feat: add Kiro OAuth web, rate limiter, metrics, fingerprint, background refresh and model converter

This commit is contained in:
781456868@qq.com
2026-01-18 15:04:29 +08:00
parent 93d7883513
commit 0e77e93e5d
37 changed files with 10396 additions and 282 deletions

View File

@@ -7,13 +7,16 @@ import (
"encoding/base64"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"path/filepath"
"strings"
"sync"
"syscall"
"time"
"github.com/google/uuid"
@@ -53,9 +56,28 @@ const (
kiroIDEUserAgent = "aws-sdk-js/1.0.18 ua/2.1 os/darwin#25.0.0 lang/js md/nodejs#20.16.0 api/codewhispererstreaming#1.0.18 m/E KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
kiroIDEAmzUserAgent = "aws-sdk-js/1.0.18 KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
kiroIDEAgentModeSpec = "spec"
kiroAgentModeVibe = "vibe"
// Socket retry configuration constants (based on kiro2Api reference implementation)
// Maximum number of retry attempts for socket/network errors
kiroSocketMaxRetries = 3
// Base delay between retry attempts (uses exponential backoff: delay * 2^attempt)
kiroSocketBaseRetryDelay = 1 * time.Second
// Maximum delay between retry attempts (cap for exponential backoff)
kiroSocketMaxRetryDelay = 30 * time.Second
// First token timeout for streaming responses (how long to wait for first response)
kiroFirstTokenTimeout = 15 * time.Second
// Streaming read timeout (how long to wait between chunks)
kiroStreamingReadTimeout = 300 * time.Second
)
// retryableHTTPStatusCodes defines HTTP status codes that are considered retryable.
// Based on kiro2Api reference: 502 (Bad Gateway), 503 (Service Unavailable), 504 (Gateway Timeout)
var retryableHTTPStatusCodes = map[int]bool{
502: true, // Bad Gateway - upstream server error
503: true, // Service Unavailable - server temporarily overloaded
504: true, // Gateway Timeout - upstream server timeout
}
// Real-time usage estimation configuration
// These control how often usage updates are sent during streaming
var (
@@ -63,6 +85,241 @@ var (
usageUpdateTimeInterval = 15 * time.Second // Or every 15 seconds, whichever comes first
)
// Global FingerprintManager for dynamic User-Agent generation per token
// Each token gets a unique fingerprint on first use, which is cached for subsequent requests
var (
globalFingerprintManager *kiroauth.FingerprintManager
globalFingerprintManagerOnce sync.Once
)
// getGlobalFingerprintManager returns the global FingerprintManager instance
func getGlobalFingerprintManager() *kiroauth.FingerprintManager {
globalFingerprintManagerOnce.Do(func() {
globalFingerprintManager = kiroauth.NewFingerprintManager()
log.Infof("kiro: initialized global FingerprintManager for dynamic UA generation")
})
return globalFingerprintManager
}
// retryConfig holds configuration for socket retry logic.
// Based on kiro2Api Python implementation patterns.
type retryConfig struct {
MaxRetries int // Maximum number of retry attempts
BaseDelay time.Duration // Base delay between retries (exponential backoff)
MaxDelay time.Duration // Maximum delay cap
RetryableErrors []string // List of retryable error patterns
RetryableStatus map[int]bool // HTTP status codes to retry
FirstTokenTmout time.Duration // Timeout for first token in streaming
StreamReadTmout time.Duration // Timeout between stream chunks
}
// defaultRetryConfig returns the default retry configuration for Kiro socket operations.
func defaultRetryConfig() retryConfig {
return retryConfig{
MaxRetries: kiroSocketMaxRetries,
BaseDelay: kiroSocketBaseRetryDelay,
MaxDelay: kiroSocketMaxRetryDelay,
RetryableStatus: retryableHTTPStatusCodes,
RetryableErrors: []string{
"connection reset",
"connection refused",
"broken pipe",
"EOF",
"timeout",
"temporary failure",
"no such host",
"network is unreachable",
"i/o timeout",
},
FirstTokenTmout: kiroFirstTokenTimeout,
StreamReadTmout: kiroStreamingReadTimeout,
}
}
// isRetryableError checks if an error is retryable based on error type and message.
// Returns true for network timeouts, connection resets, and temporary failures.
// Based on kiro2Api's retry logic patterns.
func isRetryableError(err error) bool {
if err == nil {
return false
}
// Check for context cancellation - not retryable
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
return false
}
// Check for net.Error (timeout, temporary)
var netErr net.Error
if errors.As(err, &netErr) {
if netErr.Timeout() {
log.Debugf("kiro: isRetryableError: network timeout detected")
return true
}
// Note: Temporary() is deprecated but still useful for some error types
}
// Check for specific syscall errors (connection reset, broken pipe, etc.)
var syscallErr syscall.Errno
if errors.As(err, &syscallErr) {
switch syscallErr {
case syscall.ECONNRESET: // Connection reset by peer
log.Debugf("kiro: isRetryableError: ECONNRESET detected")
return true
case syscall.ECONNREFUSED: // Connection refused
log.Debugf("kiro: isRetryableError: ECONNREFUSED detected")
return true
case syscall.EPIPE: // Broken pipe
log.Debugf("kiro: isRetryableError: EPIPE (broken pipe) detected")
return true
case syscall.ETIMEDOUT: // Connection timed out
log.Debugf("kiro: isRetryableError: ETIMEDOUT detected")
return true
case syscall.ENETUNREACH: // Network is unreachable
log.Debugf("kiro: isRetryableError: ENETUNREACH detected")
return true
case syscall.EHOSTUNREACH: // No route to host
log.Debugf("kiro: isRetryableError: EHOSTUNREACH detected")
return true
}
}
// Check for net.OpError wrapping other errors
var opErr *net.OpError
if errors.As(err, &opErr) {
log.Debugf("kiro: isRetryableError: net.OpError detected, op=%s", opErr.Op)
// Recursively check the wrapped error
if opErr.Err != nil {
return isRetryableError(opErr.Err)
}
return true
}
// Check error message for retryable patterns
errMsg := strings.ToLower(err.Error())
cfg := defaultRetryConfig()
for _, pattern := range cfg.RetryableErrors {
if strings.Contains(errMsg, pattern) {
log.Debugf("kiro: isRetryableError: pattern '%s' matched in error: %s", pattern, errMsg)
return true
}
}
// Check for EOF which may indicate connection was closed
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
log.Debugf("kiro: isRetryableError: EOF/UnexpectedEOF detected")
return true
}
return false
}
// isRetryableHTTPStatus checks if an HTTP status code is retryable.
// Based on kiro2Api: 502, 503, 504 are retryable server errors.
func isRetryableHTTPStatus(statusCode int) bool {
return retryableHTTPStatusCodes[statusCode]
}
// calculateRetryDelay calculates the delay for the next retry attempt using exponential backoff.
// delay = min(baseDelay * 2^attempt, maxDelay)
// Adds ±30% jitter to prevent thundering herd.
func calculateRetryDelay(attempt int, cfg retryConfig) time.Duration {
return kiroauth.ExponentialBackoffWithJitter(attempt, cfg.BaseDelay, cfg.MaxDelay)
}
// logRetryAttempt logs a retry attempt with relevant context.
func logRetryAttempt(attempt, maxRetries int, reason string, delay time.Duration, endpoint string) {
log.Warnf("kiro: retry attempt %d/%d for %s, waiting %v before next attempt (endpoint: %s)",
attempt+1, maxRetries, reason, delay, endpoint)
}
// kiroHTTPClientPool provides a shared HTTP client with connection pooling for Kiro API.
// This reduces connection overhead and improves performance for concurrent requests.
// Based on kiro2Api's connection pooling pattern.
var (
kiroHTTPClientPool *http.Client
kiroHTTPClientPoolOnce sync.Once
)
// getKiroPooledHTTPClient returns a shared HTTP client with optimized connection pooling.
// The client is lazily initialized on first use and reused across requests.
// This is especially beneficial for:
// - Reducing TCP handshake overhead
// - Enabling HTTP/2 multiplexing
// - Better handling of keep-alive connections
func getKiroPooledHTTPClient() *http.Client {
kiroHTTPClientPoolOnce.Do(func() {
transport := &http.Transport{
// Connection pool settings
MaxIdleConns: 100, // Max idle connections across all hosts
MaxIdleConnsPerHost: 20, // Max idle connections per host
MaxConnsPerHost: 50, // Max total connections per host
IdleConnTimeout: 90 * time.Second, // How long idle connections stay in pool
// Timeouts for connection establishment
DialContext: (&net.Dialer{
Timeout: 30 * time.Second, // TCP connection timeout
KeepAlive: 30 * time.Second, // TCP keep-alive interval
}).DialContext,
// TLS handshake timeout
TLSHandshakeTimeout: 10 * time.Second,
// Response header timeout
ResponseHeaderTimeout: 30 * time.Second,
// Expect 100-continue timeout
ExpectContinueTimeout: 1 * time.Second,
// Enable HTTP/2 when available
ForceAttemptHTTP2: true,
}
kiroHTTPClientPool = &http.Client{
Transport: transport,
// No global timeout - let individual requests set their own timeouts via context
}
log.Debugf("kiro: initialized pooled HTTP client (MaxIdleConns=%d, MaxIdleConnsPerHost=%d, MaxConnsPerHost=%d)",
transport.MaxIdleConns, transport.MaxIdleConnsPerHost, transport.MaxConnsPerHost)
})
return kiroHTTPClientPool
}
// newKiroHTTPClientWithPooling creates an HTTP client that uses connection pooling when appropriate.
// It respects proxy configuration from auth or config, falling back to the pooled client.
// This provides the best of both worlds: custom proxy support + connection reuse.
func newKiroHTTPClientWithPooling(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client {
// Check if a proxy is configured - if so, we need a custom client
var proxyURL string
if auth != nil {
proxyURL = strings.TrimSpace(auth.ProxyURL)
}
if proxyURL == "" && cfg != nil {
proxyURL = strings.TrimSpace(cfg.ProxyURL)
}
// If proxy is configured, use the existing proxy-aware client (doesn't pool)
if proxyURL != "" {
log.Debugf("kiro: using proxy-aware HTTP client (proxy=%s)", proxyURL)
return newProxyAwareHTTPClient(ctx, cfg, auth, timeout)
}
// No proxy - use pooled client for better performance
pooledClient := getKiroPooledHTTPClient()
// If timeout is specified, we need to wrap the pooled transport with timeout
if timeout > 0 {
return &http.Client{
Transport: pooledClient.Transport,
Timeout: timeout,
}
}
return pooledClient
}
// kiroEndpointConfig bundles endpoint URL with its compatible Origin and AmzTarget values.
// This solves the "triple mismatch" problem where different endpoints require matching
// Origin and X-Amz-Target header values.
@@ -99,7 +356,7 @@ var kiroEndpointConfigs = []kiroEndpointConfig{
Name: "CodeWhisperer",
},
{
URL: "https://q.us-east-1.amazonaws.com/generateAssistantResponse",
URL: "https://q.us-east-1.amazonaws.com/",
Origin: "CLI",
AmzTarget: "AmazonQDeveloperStreamingService.SendMessage",
Name: "AmazonQ",
@@ -217,6 +474,29 @@ func NewKiroExecutor(cfg *config.Config) *KiroExecutor {
// Identifier returns the unique identifier for this executor.
func (e *KiroExecutor) Identifier() string { return "kiro" }
// applyDynamicFingerprint applies token-specific fingerprint headers to the request
// For IDC auth, uses dynamic fingerprint-based User-Agent
// For other auth types, uses static Amazon Q CLI style headers
func applyDynamicFingerprint(req *http.Request, auth *cliproxyauth.Auth) {
if isIDCAuth(auth) {
// Get token-specific fingerprint for dynamic UA generation
tokenKey := getTokenKey(auth)
fp := getGlobalFingerprintManager().GetFingerprint(tokenKey)
// Use fingerprint-generated dynamic User-Agent
req.Header.Set("User-Agent", fp.BuildUserAgent())
req.Header.Set("X-Amz-User-Agent", fp.BuildAmzUserAgent())
req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
log.Debugf("kiro: using dynamic fingerprint for token %s (SDK:%s, OS:%s/%s, Kiro:%s)",
tokenKey[:8]+"...", fp.SDKVersion, fp.OSType, fp.OSVersion, fp.KiroVersion)
} else {
// Use static Amazon Q CLI style headers for non-IDC auth
req.Header.Set("User-Agent", kiroUserAgent)
req.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
}
}
// PrepareRequest prepares the HTTP request before execution.
func (e *KiroExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error {
if req == nil {
@@ -226,16 +506,10 @@ func (e *KiroExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth
if strings.TrimSpace(accessToken) == "" {
return statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
}
if isIDCAuth(auth) {
req.Header.Set("User-Agent", kiroIDEUserAgent)
req.Header.Set("X-Amz-User-Agent", kiroIDEAmzUserAgent)
req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
} else {
req.Header.Set("User-Agent", kiroUserAgent)
req.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
req.Header.Set("x-amzn-kiro-agent-mode", kiroAgentModeVibe)
}
req.Header.Set("x-amzn-codewhisperer-optout", "true")
// Apply dynamic fingerprint-based headers
applyDynamicFingerprint(req, auth)
req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
req.Header.Set("Authorization", "Bearer "+accessToken)
@@ -259,10 +533,23 @@ func (e *KiroExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth,
if errPrepare := e.PrepareRequest(httpReq, auth); errPrepare != nil {
return nil, errPrepare
}
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpClient := newKiroHTTPClientWithPooling(ctx, e.cfg, auth, 0)
return httpClient.Do(httpReq)
}
// getTokenKey returns a unique key for rate limiting based on auth credentials.
// Uses auth ID if available, otherwise falls back to a hash of the access token.
func getTokenKey(auth *cliproxyauth.Auth) string {
if auth != nil && auth.ID != "" {
return auth.ID
}
accessToken, _ := kiroCredentials(auth)
if len(accessToken) > 16 {
return accessToken[:16]
}
return accessToken
}
// Execute sends the request to Kiro API and returns the response.
// Supports automatic token refresh on 401/403 errors.
func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
@@ -271,6 +558,24 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
return resp, fmt.Errorf("kiro: access token not found in auth")
}
// Rate limiting: get token key for tracking
tokenKey := getTokenKey(auth)
rateLimiter := kiroauth.GetGlobalRateLimiter()
cooldownMgr := kiroauth.GetGlobalCooldownManager()
// Check if token is in cooldown period
if cooldownMgr.IsInCooldown(tokenKey) {
remaining := cooldownMgr.GetRemainingCooldown(tokenKey)
reason := cooldownMgr.GetCooldownReason(tokenKey)
log.Warnf("kiro: token %s is in cooldown (reason: %s), remaining: %v", tokenKey, reason, remaining)
return resp, fmt.Errorf("kiro: token is in cooldown for %v (reason: %s)", remaining, reason)
}
// Wait for rate limiter before proceeding
log.Debugf("kiro: waiting for rate limiter for token %s", tokenKey)
rateLimiter.WaitForToken(tokenKey)
log.Debugf("kiro: rate limiter cleared for token %s", tokenKey)
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
@@ -303,7 +608,7 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
// Execute with retry on 401/403 and 429 (quota exhausted)
// Note: currentOrigin and kiroPayload are built inside executeWithRetry for each endpoint
resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, to, reporter, "", kiroModelID, isAgentic, isChatOnly)
resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, to, reporter, "", kiroModelID, isAgentic, isChatOnly, tokenKey)
return resp, err
}
@@ -312,9 +617,12 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
// - Amazon Q endpoint (CLI origin) uses Amazon Q Developer quota
// - CodeWhisperer endpoint (AI_EDITOR origin) uses Kiro IDE quota
// Also supports multi-endpoint fallback similar to Antigravity implementation.
func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, accessToken, profileArn string, kiroPayload, body []byte, from, to sdktranslator.Format, reporter *usageReporter, currentOrigin, kiroModelID string, isAgentic, isChatOnly bool) (cliproxyexecutor.Response, error) {
// tokenKey is used for rate limiting and cooldown tracking.
func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, accessToken, profileArn string, kiroPayload, body []byte, from, to sdktranslator.Format, reporter *usageReporter, currentOrigin, kiroModelID string, isAgentic, isChatOnly bool, tokenKey string) (cliproxyexecutor.Response, error) {
var resp cliproxyexecutor.Response
maxRetries := 2 // Allow retries for token refresh + endpoint fallback
rateLimiter := kiroauth.GetGlobalRateLimiter()
cooldownMgr := kiroauth.GetGlobalCooldownManager()
endpointConfigs := getKiroEndpointConfigs(auth)
var last429Err error
@@ -332,6 +640,12 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
for attempt := 0; attempt <= maxRetries; attempt++ {
// Apply human-like delay before first request (not on retries)
// This mimics natural user behavior patterns
if attempt == 0 && endpointIdx == 0 {
kiroauth.ApplyHumanLikeDelay()
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(kiroPayload))
if err != nil {
return resp, err
@@ -342,20 +656,9 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
// Use different headers based on auth type
// IDC auth uses Kiro IDE style headers (from kiro2api)
// Other auth types use Amazon Q CLI style headers
if isIDCAuth(auth) {
httpReq.Header.Set("User-Agent", kiroIDEUserAgent)
httpReq.Header.Set("X-Amz-User-Agent", kiroIDEAmzUserAgent)
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
log.Debugf("kiro: using Kiro IDE headers for IDC auth")
} else {
httpReq.Header.Set("User-Agent", kiroUserAgent)
httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroAgentModeVibe)
}
httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")
// Apply dynamic fingerprint-based headers
applyDynamicFingerprint(httpReq, auth)
httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
@@ -386,10 +689,34 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
AuthValue: authValue,
})
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 120*time.Second)
httpClient := newKiroHTTPClientWithPooling(ctx, e.cfg, auth, 120*time.Second)
httpResp, err := httpClient.Do(httpReq)
if err != nil {
// Check for context cancellation first - client disconnected, not a server error
// Use 499 (Client Closed Request - nginx convention) instead of 500
if errors.Is(err, context.Canceled) {
log.Debugf("kiro: request canceled by client (context.Canceled)")
return resp, statusErr{code: 499, msg: "client canceled request"}
}
// Check for context deadline exceeded - request timed out
// Return 504 Gateway Timeout instead of 500
if errors.Is(err, context.DeadlineExceeded) {
log.Debugf("kiro: request timed out (context.DeadlineExceeded)")
return resp, statusErr{code: http.StatusGatewayTimeout, msg: "upstream request timed out"}
}
recordAPIResponseError(ctx, e.cfg, err)
// Enhanced socket retry: Check if error is retryable (network timeout, connection reset, etc.)
retryCfg := defaultRetryConfig()
if isRetryableError(err) && attempt < retryCfg.MaxRetries {
delay := calculateRetryDelay(attempt, retryCfg)
logRetryAttempt(attempt, retryCfg.MaxRetries, fmt.Sprintf("socket error: %v", err), delay, endpointConfig.Name)
time.Sleep(delay)
continue
}
return resp, err
}
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
@@ -401,6 +728,12 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
_ = httpResp.Body.Close()
appendAPIResponseChunk(ctx, e.cfg, respBody)
// Record failure and set cooldown for 429
rateLimiter.MarkTokenFailed(tokenKey)
cooldownDuration := kiroauth.CalculateCooldownFor429(attempt)
cooldownMgr.SetCooldown(tokenKey, cooldownDuration, kiroauth.CooldownReason429)
log.Warnf("kiro: rate limit hit (429), token %s set to cooldown for %v", tokenKey, cooldownDuration)
// Preserve last 429 so callers can correctly backoff when all endpoints are exhausted
last429Err = statusErr{code: httpResp.StatusCode, msg: string(respBody)}
@@ -412,13 +745,21 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
}
// Handle 5xx server errors with exponential backoff retry
// Enhanced: Use retryConfig for consistent retry behavior
if httpResp.StatusCode >= 500 && httpResp.StatusCode < 600 {
respBody, _ := io.ReadAll(httpResp.Body)
_ = httpResp.Body.Close()
appendAPIResponseChunk(ctx, e.cfg, respBody)
if attempt < maxRetries {
// Exponential backoff: 1s, 2s, 4s... (max 30s)
retryCfg := defaultRetryConfig()
// Check if this specific 5xx code is retryable (502, 503, 504)
if isRetryableHTTPStatus(httpResp.StatusCode) && attempt < retryCfg.MaxRetries {
delay := calculateRetryDelay(attempt, retryCfg)
logRetryAttempt(attempt, retryCfg.MaxRetries, fmt.Sprintf("HTTP %d", httpResp.StatusCode), delay, endpointConfig.Name)
time.Sleep(delay)
continue
} else if attempt < maxRetries {
// Fallback for other 5xx errors (500, 501, etc.)
backoff := time.Duration(1<<attempt) * time.Second
if backoff > 30*time.Second {
backoff = 30 * time.Second
@@ -492,7 +833,10 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
// Check for SUSPENDED status - return immediately without retry
if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") {
log.Errorf("kiro: account is suspended, cannot proceed")
// Set long cooldown for suspended accounts
rateLimiter.CheckAndMarkSuspended(tokenKey, respBodyStr)
cooldownMgr.SetCooldown(tokenKey, kiroauth.LongCooldown, kiroauth.CooldownReasonSuspended)
log.Errorf("kiro: account is suspended, token %s set to cooldown for %v", tokenKey, kiroauth.LongCooldown)
return resp, statusErr{code: httpResp.StatusCode, msg: "account suspended: " + string(respBody)}
}
@@ -581,6 +925,10 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
appendAPIResponseChunk(ctx, e.cfg, []byte(content))
reporter.publish(ctx, usageInfo)
// Record success for rate limiting
rateLimiter.MarkTokenSuccess(tokenKey)
log.Debugf("kiro: request successful, token %s marked as success", tokenKey)
// Build response in Claude format for Kiro translator
// stopReason is extracted from upstream response by parseEventStream
kiroResponse := kiroclaude.BuildClaudeResponse(content, toolUses, req.Model, usageInfo, stopReason)
@@ -608,6 +956,24 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
return nil, fmt.Errorf("kiro: access token not found in auth")
}
// Rate limiting: get token key for tracking
tokenKey := getTokenKey(auth)
rateLimiter := kiroauth.GetGlobalRateLimiter()
cooldownMgr := kiroauth.GetGlobalCooldownManager()
// Check if token is in cooldown period
if cooldownMgr.IsInCooldown(tokenKey) {
remaining := cooldownMgr.GetRemainingCooldown(tokenKey)
reason := cooldownMgr.GetCooldownReason(tokenKey)
log.Warnf("kiro: token %s is in cooldown (reason: %s), remaining: %v", tokenKey, reason, remaining)
return nil, fmt.Errorf("kiro: token is in cooldown for %v (reason: %s)", remaining, reason)
}
// Wait for rate limiter before proceeding
log.Debugf("kiro: stream waiting for rate limiter for token %s", tokenKey)
rateLimiter.WaitForToken(tokenKey)
log.Debugf("kiro: stream rate limiter cleared for token %s", tokenKey)
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
defer reporter.trackFailure(ctx, &err)
@@ -640,7 +1006,7 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
// Execute stream with retry on 401/403 and 429 (quota exhausted)
// Note: currentOrigin and kiroPayload are built inside executeStreamWithRetry for each endpoint
return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, reporter, "", kiroModelID, isAgentic, isChatOnly)
return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, reporter, "", kiroModelID, isAgentic, isChatOnly, tokenKey)
}
// executeStreamWithRetry performs the streaming HTTP request with automatic retry on auth errors.
@@ -648,8 +1014,11 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
// - Amazon Q endpoint (CLI origin) uses Amazon Q Developer quota
// - CodeWhisperer endpoint (AI_EDITOR origin) uses Kiro IDE quota
// Also supports multi-endpoint fallback similar to Antigravity implementation.
func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, accessToken, profileArn string, kiroPayload, body []byte, from sdktranslator.Format, reporter *usageReporter, currentOrigin, kiroModelID string, isAgentic, isChatOnly bool) (<-chan cliproxyexecutor.StreamChunk, error) {
// tokenKey is used for rate limiting and cooldown tracking.
func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, accessToken, profileArn string, kiroPayload, body []byte, from sdktranslator.Format, reporter *usageReporter, currentOrigin, kiroModelID string, isAgentic, isChatOnly bool, tokenKey string) (<-chan cliproxyexecutor.StreamChunk, error) {
maxRetries := 2 // Allow retries for token refresh + endpoint fallback
rateLimiter := kiroauth.GetGlobalRateLimiter()
cooldownMgr := kiroauth.GetGlobalCooldownManager()
endpointConfigs := getKiroEndpointConfigs(auth)
var last429Err error
@@ -667,6 +1036,13 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
for attempt := 0; attempt <= maxRetries; attempt++ {
// Apply human-like delay before first streaming request (not on retries)
// This mimics natural user behavior patterns
// Note: Delay is NOT applied during streaming response - only before initial request
if attempt == 0 && endpointIdx == 0 {
kiroauth.ApplyHumanLikeDelay()
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(kiroPayload))
if err != nil {
return nil, err
@@ -677,20 +1053,9 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
// Use different headers based on auth type
// IDC auth uses Kiro IDE style headers (from kiro2api)
// Other auth types use Amazon Q CLI style headers
if isIDCAuth(auth) {
httpReq.Header.Set("User-Agent", kiroIDEUserAgent)
httpReq.Header.Set("X-Amz-User-Agent", kiroIDEAmzUserAgent)
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
log.Debugf("kiro: using Kiro IDE headers for IDC auth")
} else {
httpReq.Header.Set("User-Agent", kiroUserAgent)
httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroAgentModeVibe)
}
httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")
// Apply dynamic fingerprint-based headers
applyDynamicFingerprint(httpReq, auth)
httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
@@ -721,10 +1086,20 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
AuthValue: authValue,
})
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpClient := newKiroHTTPClientWithPooling(ctx, e.cfg, auth, 0)
httpResp, err := httpClient.Do(httpReq)
if err != nil {
recordAPIResponseError(ctx, e.cfg, err)
// Enhanced socket retry for streaming: Check if error is retryable (network timeout, connection reset, etc.)
retryCfg := defaultRetryConfig()
if isRetryableError(err) && attempt < retryCfg.MaxRetries {
delay := calculateRetryDelay(attempt, retryCfg)
logRetryAttempt(attempt, retryCfg.MaxRetries, fmt.Sprintf("stream socket error: %v", err), delay, endpointConfig.Name)
time.Sleep(delay)
continue
}
return nil, err
}
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
@@ -736,6 +1111,12 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
_ = httpResp.Body.Close()
appendAPIResponseChunk(ctx, e.cfg, respBody)
// Record failure and set cooldown for 429
rateLimiter.MarkTokenFailed(tokenKey)
cooldownDuration := kiroauth.CalculateCooldownFor429(attempt)
cooldownMgr.SetCooldown(tokenKey, cooldownDuration, kiroauth.CooldownReason429)
log.Warnf("kiro: stream rate limit hit (429), token %s set to cooldown for %v", tokenKey, cooldownDuration)
// Preserve last 429 so callers can correctly backoff when all endpoints are exhausted
last429Err = statusErr{code: httpResp.StatusCode, msg: string(respBody)}
@@ -747,13 +1128,21 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
}
// Handle 5xx server errors with exponential backoff retry
// Enhanced: Use retryConfig for consistent retry behavior
if httpResp.StatusCode >= 500 && httpResp.StatusCode < 600 {
respBody, _ := io.ReadAll(httpResp.Body)
_ = httpResp.Body.Close()
appendAPIResponseChunk(ctx, e.cfg, respBody)
if attempt < maxRetries {
// Exponential backoff: 1s, 2s, 4s... (max 30s)
retryCfg := defaultRetryConfig()
// Check if this specific 5xx code is retryable (502, 503, 504)
if isRetryableHTTPStatus(httpResp.StatusCode) && attempt < retryCfg.MaxRetries {
delay := calculateRetryDelay(attempt, retryCfg)
logRetryAttempt(attempt, retryCfg.MaxRetries, fmt.Sprintf("stream HTTP %d", httpResp.StatusCode), delay, endpointConfig.Name)
time.Sleep(delay)
continue
} else if attempt < maxRetries {
// Fallback for other 5xx errors (500, 501, etc.)
backoff := time.Duration(1<<attempt) * time.Second
if backoff > 30*time.Second {
backoff = 30 * time.Second
@@ -840,7 +1229,10 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
// Check for SUSPENDED status - return immediately without retry
if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") {
log.Errorf("kiro: account is suspended, cannot proceed")
// Set long cooldown for suspended accounts
rateLimiter.CheckAndMarkSuspended(tokenKey, respBodyStr)
cooldownMgr.SetCooldown(tokenKey, kiroauth.LongCooldown, kiroauth.CooldownReasonSuspended)
log.Errorf("kiro: stream account is suspended, token %s set to cooldown for %v", tokenKey, kiroauth.LongCooldown)
return nil, statusErr{code: httpResp.StatusCode, msg: "account suspended: " + string(respBody)}
}
@@ -890,6 +1282,11 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
out := make(chan cliproxyexecutor.StreamChunk)
// Record success immediately since connection was established successfully
// Streaming errors will be handled separately
rateLimiter.MarkTokenSuccess(tokenKey)
log.Debugf("kiro: stream request successful, token %s marked as success", tokenKey)
go func(resp *http.Response, thinkingEnabled bool) {
defer close(out)
defer func() {