mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-20 22:51:45 +00:00
feat: add Kiro OAuth web, rate limiter, metrics, fingerprint, background refresh and model converter
This commit is contained in:
@@ -7,13 +7,16 @@ import (
|
||||
"encoding/base64"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -53,9 +56,28 @@ const (
|
||||
kiroIDEUserAgent = "aws-sdk-js/1.0.18 ua/2.1 os/darwin#25.0.0 lang/js md/nodejs#20.16.0 api/codewhispererstreaming#1.0.18 m/E KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
|
||||
kiroIDEAmzUserAgent = "aws-sdk-js/1.0.18 KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
|
||||
kiroIDEAgentModeSpec = "spec"
|
||||
kiroAgentModeVibe = "vibe"
|
||||
|
||||
// Socket retry configuration constants (based on kiro2Api reference implementation)
|
||||
// Maximum number of retry attempts for socket/network errors
|
||||
kiroSocketMaxRetries = 3
|
||||
// Base delay between retry attempts (uses exponential backoff: delay * 2^attempt)
|
||||
kiroSocketBaseRetryDelay = 1 * time.Second
|
||||
// Maximum delay between retry attempts (cap for exponential backoff)
|
||||
kiroSocketMaxRetryDelay = 30 * time.Second
|
||||
// First token timeout for streaming responses (how long to wait for first response)
|
||||
kiroFirstTokenTimeout = 15 * time.Second
|
||||
// Streaming read timeout (how long to wait between chunks)
|
||||
kiroStreamingReadTimeout = 300 * time.Second
|
||||
)
|
||||
|
||||
// retryableHTTPStatusCodes defines HTTP status codes that are considered retryable.
|
||||
// Based on kiro2Api reference: 502 (Bad Gateway), 503 (Service Unavailable), 504 (Gateway Timeout)
|
||||
var retryableHTTPStatusCodes = map[int]bool{
|
||||
502: true, // Bad Gateway - upstream server error
|
||||
503: true, // Service Unavailable - server temporarily overloaded
|
||||
504: true, // Gateway Timeout - upstream server timeout
|
||||
}
|
||||
|
||||
// Real-time usage estimation configuration
|
||||
// These control how often usage updates are sent during streaming
|
||||
var (
|
||||
@@ -63,6 +85,241 @@ var (
|
||||
usageUpdateTimeInterval = 15 * time.Second // Or every 15 seconds, whichever comes first
|
||||
)
|
||||
|
||||
// Global FingerprintManager for dynamic User-Agent generation per token
|
||||
// Each token gets a unique fingerprint on first use, which is cached for subsequent requests
|
||||
var (
|
||||
globalFingerprintManager *kiroauth.FingerprintManager
|
||||
globalFingerprintManagerOnce sync.Once
|
||||
)
|
||||
|
||||
// getGlobalFingerprintManager returns the global FingerprintManager instance
|
||||
func getGlobalFingerprintManager() *kiroauth.FingerprintManager {
|
||||
globalFingerprintManagerOnce.Do(func() {
|
||||
globalFingerprintManager = kiroauth.NewFingerprintManager()
|
||||
log.Infof("kiro: initialized global FingerprintManager for dynamic UA generation")
|
||||
})
|
||||
return globalFingerprintManager
|
||||
}
|
||||
|
||||
// retryConfig holds configuration for socket retry logic.
|
||||
// Based on kiro2Api Python implementation patterns.
|
||||
type retryConfig struct {
|
||||
MaxRetries int // Maximum number of retry attempts
|
||||
BaseDelay time.Duration // Base delay between retries (exponential backoff)
|
||||
MaxDelay time.Duration // Maximum delay cap
|
||||
RetryableErrors []string // List of retryable error patterns
|
||||
RetryableStatus map[int]bool // HTTP status codes to retry
|
||||
FirstTokenTmout time.Duration // Timeout for first token in streaming
|
||||
StreamReadTmout time.Duration // Timeout between stream chunks
|
||||
}
|
||||
|
||||
// defaultRetryConfig returns the default retry configuration for Kiro socket operations.
|
||||
func defaultRetryConfig() retryConfig {
|
||||
return retryConfig{
|
||||
MaxRetries: kiroSocketMaxRetries,
|
||||
BaseDelay: kiroSocketBaseRetryDelay,
|
||||
MaxDelay: kiroSocketMaxRetryDelay,
|
||||
RetryableStatus: retryableHTTPStatusCodes,
|
||||
RetryableErrors: []string{
|
||||
"connection reset",
|
||||
"connection refused",
|
||||
"broken pipe",
|
||||
"EOF",
|
||||
"timeout",
|
||||
"temporary failure",
|
||||
"no such host",
|
||||
"network is unreachable",
|
||||
"i/o timeout",
|
||||
},
|
||||
FirstTokenTmout: kiroFirstTokenTimeout,
|
||||
StreamReadTmout: kiroStreamingReadTimeout,
|
||||
}
|
||||
}
|
||||
|
||||
// isRetryableError checks if an error is retryable based on error type and message.
|
||||
// Returns true for network timeouts, connection resets, and temporary failures.
|
||||
// Based on kiro2Api's retry logic patterns.
|
||||
func isRetryableError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for context cancellation - not retryable
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for net.Error (timeout, temporary)
|
||||
var netErr net.Error
|
||||
if errors.As(err, &netErr) {
|
||||
if netErr.Timeout() {
|
||||
log.Debugf("kiro: isRetryableError: network timeout detected")
|
||||
return true
|
||||
}
|
||||
// Note: Temporary() is deprecated but still useful for some error types
|
||||
}
|
||||
|
||||
// Check for specific syscall errors (connection reset, broken pipe, etc.)
|
||||
var syscallErr syscall.Errno
|
||||
if errors.As(err, &syscallErr) {
|
||||
switch syscallErr {
|
||||
case syscall.ECONNRESET: // Connection reset by peer
|
||||
log.Debugf("kiro: isRetryableError: ECONNRESET detected")
|
||||
return true
|
||||
case syscall.ECONNREFUSED: // Connection refused
|
||||
log.Debugf("kiro: isRetryableError: ECONNREFUSED detected")
|
||||
return true
|
||||
case syscall.EPIPE: // Broken pipe
|
||||
log.Debugf("kiro: isRetryableError: EPIPE (broken pipe) detected")
|
||||
return true
|
||||
case syscall.ETIMEDOUT: // Connection timed out
|
||||
log.Debugf("kiro: isRetryableError: ETIMEDOUT detected")
|
||||
return true
|
||||
case syscall.ENETUNREACH: // Network is unreachable
|
||||
log.Debugf("kiro: isRetryableError: ENETUNREACH detected")
|
||||
return true
|
||||
case syscall.EHOSTUNREACH: // No route to host
|
||||
log.Debugf("kiro: isRetryableError: EHOSTUNREACH detected")
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check for net.OpError wrapping other errors
|
||||
var opErr *net.OpError
|
||||
if errors.As(err, &opErr) {
|
||||
log.Debugf("kiro: isRetryableError: net.OpError detected, op=%s", opErr.Op)
|
||||
// Recursively check the wrapped error
|
||||
if opErr.Err != nil {
|
||||
return isRetryableError(opErr.Err)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Check error message for retryable patterns
|
||||
errMsg := strings.ToLower(err.Error())
|
||||
cfg := defaultRetryConfig()
|
||||
for _, pattern := range cfg.RetryableErrors {
|
||||
if strings.Contains(errMsg, pattern) {
|
||||
log.Debugf("kiro: isRetryableError: pattern '%s' matched in error: %s", pattern, errMsg)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check for EOF which may indicate connection was closed
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
log.Debugf("kiro: isRetryableError: EOF/UnexpectedEOF detected")
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// isRetryableHTTPStatus checks if an HTTP status code is retryable.
|
||||
// Based on kiro2Api: 502, 503, 504 are retryable server errors.
|
||||
func isRetryableHTTPStatus(statusCode int) bool {
|
||||
return retryableHTTPStatusCodes[statusCode]
|
||||
}
|
||||
|
||||
// calculateRetryDelay calculates the delay for the next retry attempt using exponential backoff.
|
||||
// delay = min(baseDelay * 2^attempt, maxDelay)
|
||||
// Adds ±30% jitter to prevent thundering herd.
|
||||
func calculateRetryDelay(attempt int, cfg retryConfig) time.Duration {
|
||||
return kiroauth.ExponentialBackoffWithJitter(attempt, cfg.BaseDelay, cfg.MaxDelay)
|
||||
}
|
||||
|
||||
// logRetryAttempt logs a retry attempt with relevant context.
|
||||
func logRetryAttempt(attempt, maxRetries int, reason string, delay time.Duration, endpoint string) {
|
||||
log.Warnf("kiro: retry attempt %d/%d for %s, waiting %v before next attempt (endpoint: %s)",
|
||||
attempt+1, maxRetries, reason, delay, endpoint)
|
||||
}
|
||||
|
||||
// kiroHTTPClientPool provides a shared HTTP client with connection pooling for Kiro API.
|
||||
// This reduces connection overhead and improves performance for concurrent requests.
|
||||
// Based on kiro2Api's connection pooling pattern.
|
||||
var (
|
||||
kiroHTTPClientPool *http.Client
|
||||
kiroHTTPClientPoolOnce sync.Once
|
||||
)
|
||||
|
||||
// getKiroPooledHTTPClient returns a shared HTTP client with optimized connection pooling.
|
||||
// The client is lazily initialized on first use and reused across requests.
|
||||
// This is especially beneficial for:
|
||||
// - Reducing TCP handshake overhead
|
||||
// - Enabling HTTP/2 multiplexing
|
||||
// - Better handling of keep-alive connections
|
||||
func getKiroPooledHTTPClient() *http.Client {
|
||||
kiroHTTPClientPoolOnce.Do(func() {
|
||||
transport := &http.Transport{
|
||||
// Connection pool settings
|
||||
MaxIdleConns: 100, // Max idle connections across all hosts
|
||||
MaxIdleConnsPerHost: 20, // Max idle connections per host
|
||||
MaxConnsPerHost: 50, // Max total connections per host
|
||||
IdleConnTimeout: 90 * time.Second, // How long idle connections stay in pool
|
||||
|
||||
// Timeouts for connection establishment
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: 30 * time.Second, // TCP connection timeout
|
||||
KeepAlive: 30 * time.Second, // TCP keep-alive interval
|
||||
}).DialContext,
|
||||
|
||||
// TLS handshake timeout
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
|
||||
// Response header timeout
|
||||
ResponseHeaderTimeout: 30 * time.Second,
|
||||
|
||||
// Expect 100-continue timeout
|
||||
ExpectContinueTimeout: 1 * time.Second,
|
||||
|
||||
// Enable HTTP/2 when available
|
||||
ForceAttemptHTTP2: true,
|
||||
}
|
||||
|
||||
kiroHTTPClientPool = &http.Client{
|
||||
Transport: transport,
|
||||
// No global timeout - let individual requests set their own timeouts via context
|
||||
}
|
||||
|
||||
log.Debugf("kiro: initialized pooled HTTP client (MaxIdleConns=%d, MaxIdleConnsPerHost=%d, MaxConnsPerHost=%d)",
|
||||
transport.MaxIdleConns, transport.MaxIdleConnsPerHost, transport.MaxConnsPerHost)
|
||||
})
|
||||
|
||||
return kiroHTTPClientPool
|
||||
}
|
||||
|
||||
// newKiroHTTPClientWithPooling creates an HTTP client that uses connection pooling when appropriate.
|
||||
// It respects proxy configuration from auth or config, falling back to the pooled client.
|
||||
// This provides the best of both worlds: custom proxy support + connection reuse.
|
||||
func newKiroHTTPClientWithPooling(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client {
|
||||
// Check if a proxy is configured - if so, we need a custom client
|
||||
var proxyURL string
|
||||
if auth != nil {
|
||||
proxyURL = strings.TrimSpace(auth.ProxyURL)
|
||||
}
|
||||
if proxyURL == "" && cfg != nil {
|
||||
proxyURL = strings.TrimSpace(cfg.ProxyURL)
|
||||
}
|
||||
|
||||
// If proxy is configured, use the existing proxy-aware client (doesn't pool)
|
||||
if proxyURL != "" {
|
||||
log.Debugf("kiro: using proxy-aware HTTP client (proxy=%s)", proxyURL)
|
||||
return newProxyAwareHTTPClient(ctx, cfg, auth, timeout)
|
||||
}
|
||||
|
||||
// No proxy - use pooled client for better performance
|
||||
pooledClient := getKiroPooledHTTPClient()
|
||||
|
||||
// If timeout is specified, we need to wrap the pooled transport with timeout
|
||||
if timeout > 0 {
|
||||
return &http.Client{
|
||||
Transport: pooledClient.Transport,
|
||||
Timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
return pooledClient
|
||||
}
|
||||
|
||||
// kiroEndpointConfig bundles endpoint URL with its compatible Origin and AmzTarget values.
|
||||
// This solves the "triple mismatch" problem where different endpoints require matching
|
||||
// Origin and X-Amz-Target header values.
|
||||
@@ -99,7 +356,7 @@ var kiroEndpointConfigs = []kiroEndpointConfig{
|
||||
Name: "CodeWhisperer",
|
||||
},
|
||||
{
|
||||
URL: "https://q.us-east-1.amazonaws.com/generateAssistantResponse",
|
||||
URL: "https://q.us-east-1.amazonaws.com/",
|
||||
Origin: "CLI",
|
||||
AmzTarget: "AmazonQDeveloperStreamingService.SendMessage",
|
||||
Name: "AmazonQ",
|
||||
@@ -217,6 +474,29 @@ func NewKiroExecutor(cfg *config.Config) *KiroExecutor {
|
||||
// Identifier returns the unique identifier for this executor.
|
||||
func (e *KiroExecutor) Identifier() string { return "kiro" }
|
||||
|
||||
// applyDynamicFingerprint applies token-specific fingerprint headers to the request
|
||||
// For IDC auth, uses dynamic fingerprint-based User-Agent
|
||||
// For other auth types, uses static Amazon Q CLI style headers
|
||||
func applyDynamicFingerprint(req *http.Request, auth *cliproxyauth.Auth) {
|
||||
if isIDCAuth(auth) {
|
||||
// Get token-specific fingerprint for dynamic UA generation
|
||||
tokenKey := getTokenKey(auth)
|
||||
fp := getGlobalFingerprintManager().GetFingerprint(tokenKey)
|
||||
|
||||
// Use fingerprint-generated dynamic User-Agent
|
||||
req.Header.Set("User-Agent", fp.BuildUserAgent())
|
||||
req.Header.Set("X-Amz-User-Agent", fp.BuildAmzUserAgent())
|
||||
req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
|
||||
|
||||
log.Debugf("kiro: using dynamic fingerprint for token %s (SDK:%s, OS:%s/%s, Kiro:%s)",
|
||||
tokenKey[:8]+"...", fp.SDKVersion, fp.OSType, fp.OSVersion, fp.KiroVersion)
|
||||
} else {
|
||||
// Use static Amazon Q CLI style headers for non-IDC auth
|
||||
req.Header.Set("User-Agent", kiroUserAgent)
|
||||
req.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
|
||||
}
|
||||
}
|
||||
|
||||
// PrepareRequest prepares the HTTP request before execution.
|
||||
func (e *KiroExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error {
|
||||
if req == nil {
|
||||
@@ -226,16 +506,10 @@ func (e *KiroExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth
|
||||
if strings.TrimSpace(accessToken) == "" {
|
||||
return statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
|
||||
}
|
||||
if isIDCAuth(auth) {
|
||||
req.Header.Set("User-Agent", kiroIDEUserAgent)
|
||||
req.Header.Set("X-Amz-User-Agent", kiroIDEAmzUserAgent)
|
||||
req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
|
||||
} else {
|
||||
req.Header.Set("User-Agent", kiroUserAgent)
|
||||
req.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
|
||||
req.Header.Set("x-amzn-kiro-agent-mode", kiroAgentModeVibe)
|
||||
}
|
||||
req.Header.Set("x-amzn-codewhisperer-optout", "true")
|
||||
|
||||
// Apply dynamic fingerprint-based headers
|
||||
applyDynamicFingerprint(req, auth)
|
||||
|
||||
req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
|
||||
req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
|
||||
req.Header.Set("Authorization", "Bearer "+accessToken)
|
||||
@@ -259,10 +533,23 @@ func (e *KiroExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth,
|
||||
if errPrepare := e.PrepareRequest(httpReq, auth); errPrepare != nil {
|
||||
return nil, errPrepare
|
||||
}
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpClient := newKiroHTTPClientWithPooling(ctx, e.cfg, auth, 0)
|
||||
return httpClient.Do(httpReq)
|
||||
}
|
||||
|
||||
// getTokenKey returns a unique key for rate limiting based on auth credentials.
|
||||
// Uses auth ID if available, otherwise falls back to a hash of the access token.
|
||||
func getTokenKey(auth *cliproxyauth.Auth) string {
|
||||
if auth != nil && auth.ID != "" {
|
||||
return auth.ID
|
||||
}
|
||||
accessToken, _ := kiroCredentials(auth)
|
||||
if len(accessToken) > 16 {
|
||||
return accessToken[:16]
|
||||
}
|
||||
return accessToken
|
||||
}
|
||||
|
||||
// Execute sends the request to Kiro API and returns the response.
|
||||
// Supports automatic token refresh on 401/403 errors.
|
||||
func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
|
||||
@@ -271,6 +558,24 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
return resp, fmt.Errorf("kiro: access token not found in auth")
|
||||
}
|
||||
|
||||
// Rate limiting: get token key for tracking
|
||||
tokenKey := getTokenKey(auth)
|
||||
rateLimiter := kiroauth.GetGlobalRateLimiter()
|
||||
cooldownMgr := kiroauth.GetGlobalCooldownManager()
|
||||
|
||||
// Check if token is in cooldown period
|
||||
if cooldownMgr.IsInCooldown(tokenKey) {
|
||||
remaining := cooldownMgr.GetRemainingCooldown(tokenKey)
|
||||
reason := cooldownMgr.GetCooldownReason(tokenKey)
|
||||
log.Warnf("kiro: token %s is in cooldown (reason: %s), remaining: %v", tokenKey, reason, remaining)
|
||||
return resp, fmt.Errorf("kiro: token is in cooldown for %v (reason: %s)", remaining, reason)
|
||||
}
|
||||
|
||||
// Wait for rate limiter before proceeding
|
||||
log.Debugf("kiro: waiting for rate limiter for token %s", tokenKey)
|
||||
rateLimiter.WaitForToken(tokenKey)
|
||||
log.Debugf("kiro: rate limiter cleared for token %s", tokenKey)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
@@ -303,7 +608,7 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
|
||||
// Execute with retry on 401/403 and 429 (quota exhausted)
|
||||
// Note: currentOrigin and kiroPayload are built inside executeWithRetry for each endpoint
|
||||
resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, to, reporter, "", kiroModelID, isAgentic, isChatOnly)
|
||||
resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, to, reporter, "", kiroModelID, isAgentic, isChatOnly, tokenKey)
|
||||
return resp, err
|
||||
}
|
||||
|
||||
@@ -312,9 +617,12 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
// - Amazon Q endpoint (CLI origin) uses Amazon Q Developer quota
|
||||
// - CodeWhisperer endpoint (AI_EDITOR origin) uses Kiro IDE quota
|
||||
// Also supports multi-endpoint fallback similar to Antigravity implementation.
|
||||
func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, accessToken, profileArn string, kiroPayload, body []byte, from, to sdktranslator.Format, reporter *usageReporter, currentOrigin, kiroModelID string, isAgentic, isChatOnly bool) (cliproxyexecutor.Response, error) {
|
||||
// tokenKey is used for rate limiting and cooldown tracking.
|
||||
func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, accessToken, profileArn string, kiroPayload, body []byte, from, to sdktranslator.Format, reporter *usageReporter, currentOrigin, kiroModelID string, isAgentic, isChatOnly bool, tokenKey string) (cliproxyexecutor.Response, error) {
|
||||
var resp cliproxyexecutor.Response
|
||||
maxRetries := 2 // Allow retries for token refresh + endpoint fallback
|
||||
rateLimiter := kiroauth.GetGlobalRateLimiter()
|
||||
cooldownMgr := kiroauth.GetGlobalCooldownManager()
|
||||
endpointConfigs := getKiroEndpointConfigs(auth)
|
||||
var last429Err error
|
||||
|
||||
@@ -332,6 +640,12 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
|
||||
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
// Apply human-like delay before first request (not on retries)
|
||||
// This mimics natural user behavior patterns
|
||||
if attempt == 0 && endpointIdx == 0 {
|
||||
kiroauth.ApplyHumanLikeDelay()
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(kiroPayload))
|
||||
if err != nil {
|
||||
return resp, err
|
||||
@@ -342,20 +656,9 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
|
||||
httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
|
||||
|
||||
// Use different headers based on auth type
|
||||
// IDC auth uses Kiro IDE style headers (from kiro2api)
|
||||
// Other auth types use Amazon Q CLI style headers
|
||||
if isIDCAuth(auth) {
|
||||
httpReq.Header.Set("User-Agent", kiroIDEUserAgent)
|
||||
httpReq.Header.Set("X-Amz-User-Agent", kiroIDEAmzUserAgent)
|
||||
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
|
||||
log.Debugf("kiro: using Kiro IDE headers for IDC auth")
|
||||
} else {
|
||||
httpReq.Header.Set("User-Agent", kiroUserAgent)
|
||||
httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
|
||||
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroAgentModeVibe)
|
||||
}
|
||||
httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")
|
||||
// Apply dynamic fingerprint-based headers
|
||||
applyDynamicFingerprint(httpReq, auth)
|
||||
|
||||
httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
|
||||
httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
|
||||
|
||||
@@ -386,10 +689,34 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 120*time.Second)
|
||||
httpClient := newKiroHTTPClientWithPooling(ctx, e.cfg, auth, 120*time.Second)
|
||||
httpResp, err := httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
// Check for context cancellation first - client disconnected, not a server error
|
||||
// Use 499 (Client Closed Request - nginx convention) instead of 500
|
||||
if errors.Is(err, context.Canceled) {
|
||||
log.Debugf("kiro: request canceled by client (context.Canceled)")
|
||||
return resp, statusErr{code: 499, msg: "client canceled request"}
|
||||
}
|
||||
|
||||
// Check for context deadline exceeded - request timed out
|
||||
// Return 504 Gateway Timeout instead of 500
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
log.Debugf("kiro: request timed out (context.DeadlineExceeded)")
|
||||
return resp, statusErr{code: http.StatusGatewayTimeout, msg: "upstream request timed out"}
|
||||
}
|
||||
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
|
||||
// Enhanced socket retry: Check if error is retryable (network timeout, connection reset, etc.)
|
||||
retryCfg := defaultRetryConfig()
|
||||
if isRetryableError(err) && attempt < retryCfg.MaxRetries {
|
||||
delay := calculateRetryDelay(attempt, retryCfg)
|
||||
logRetryAttempt(attempt, retryCfg.MaxRetries, fmt.Sprintf("socket error: %v", err), delay, endpointConfig.Name)
|
||||
time.Sleep(delay)
|
||||
continue
|
||||
}
|
||||
|
||||
return resp, err
|
||||
}
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
@@ -401,6 +728,12 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
_ = httpResp.Body.Close()
|
||||
appendAPIResponseChunk(ctx, e.cfg, respBody)
|
||||
|
||||
// Record failure and set cooldown for 429
|
||||
rateLimiter.MarkTokenFailed(tokenKey)
|
||||
cooldownDuration := kiroauth.CalculateCooldownFor429(attempt)
|
||||
cooldownMgr.SetCooldown(tokenKey, cooldownDuration, kiroauth.CooldownReason429)
|
||||
log.Warnf("kiro: rate limit hit (429), token %s set to cooldown for %v", tokenKey, cooldownDuration)
|
||||
|
||||
// Preserve last 429 so callers can correctly backoff when all endpoints are exhausted
|
||||
last429Err = statusErr{code: httpResp.StatusCode, msg: string(respBody)}
|
||||
|
||||
@@ -412,13 +745,21 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
}
|
||||
|
||||
// Handle 5xx server errors with exponential backoff retry
|
||||
// Enhanced: Use retryConfig for consistent retry behavior
|
||||
if httpResp.StatusCode >= 500 && httpResp.StatusCode < 600 {
|
||||
respBody, _ := io.ReadAll(httpResp.Body)
|
||||
_ = httpResp.Body.Close()
|
||||
appendAPIResponseChunk(ctx, e.cfg, respBody)
|
||||
|
||||
if attempt < maxRetries {
|
||||
// Exponential backoff: 1s, 2s, 4s... (max 30s)
|
||||
retryCfg := defaultRetryConfig()
|
||||
// Check if this specific 5xx code is retryable (502, 503, 504)
|
||||
if isRetryableHTTPStatus(httpResp.StatusCode) && attempt < retryCfg.MaxRetries {
|
||||
delay := calculateRetryDelay(attempt, retryCfg)
|
||||
logRetryAttempt(attempt, retryCfg.MaxRetries, fmt.Sprintf("HTTP %d", httpResp.StatusCode), delay, endpointConfig.Name)
|
||||
time.Sleep(delay)
|
||||
continue
|
||||
} else if attempt < maxRetries {
|
||||
// Fallback for other 5xx errors (500, 501, etc.)
|
||||
backoff := time.Duration(1<<attempt) * time.Second
|
||||
if backoff > 30*time.Second {
|
||||
backoff = 30 * time.Second
|
||||
@@ -492,7 +833,10 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
|
||||
// Check for SUSPENDED status - return immediately without retry
|
||||
if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") {
|
||||
log.Errorf("kiro: account is suspended, cannot proceed")
|
||||
// Set long cooldown for suspended accounts
|
||||
rateLimiter.CheckAndMarkSuspended(tokenKey, respBodyStr)
|
||||
cooldownMgr.SetCooldown(tokenKey, kiroauth.LongCooldown, kiroauth.CooldownReasonSuspended)
|
||||
log.Errorf("kiro: account is suspended, token %s set to cooldown for %v", tokenKey, kiroauth.LongCooldown)
|
||||
return resp, statusErr{code: httpResp.StatusCode, msg: "account suspended: " + string(respBody)}
|
||||
}
|
||||
|
||||
@@ -581,6 +925,10 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
|
||||
appendAPIResponseChunk(ctx, e.cfg, []byte(content))
|
||||
reporter.publish(ctx, usageInfo)
|
||||
|
||||
// Record success for rate limiting
|
||||
rateLimiter.MarkTokenSuccess(tokenKey)
|
||||
log.Debugf("kiro: request successful, token %s marked as success", tokenKey)
|
||||
|
||||
// Build response in Claude format for Kiro translator
|
||||
// stopReason is extracted from upstream response by parseEventStream
|
||||
kiroResponse := kiroclaude.BuildClaudeResponse(content, toolUses, req.Model, usageInfo, stopReason)
|
||||
@@ -608,6 +956,24 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
return nil, fmt.Errorf("kiro: access token not found in auth")
|
||||
}
|
||||
|
||||
// Rate limiting: get token key for tracking
|
||||
tokenKey := getTokenKey(auth)
|
||||
rateLimiter := kiroauth.GetGlobalRateLimiter()
|
||||
cooldownMgr := kiroauth.GetGlobalCooldownManager()
|
||||
|
||||
// Check if token is in cooldown period
|
||||
if cooldownMgr.IsInCooldown(tokenKey) {
|
||||
remaining := cooldownMgr.GetRemainingCooldown(tokenKey)
|
||||
reason := cooldownMgr.GetCooldownReason(tokenKey)
|
||||
log.Warnf("kiro: token %s is in cooldown (reason: %s), remaining: %v", tokenKey, reason, remaining)
|
||||
return nil, fmt.Errorf("kiro: token is in cooldown for %v (reason: %s)", remaining, reason)
|
||||
}
|
||||
|
||||
// Wait for rate limiter before proceeding
|
||||
log.Debugf("kiro: stream waiting for rate limiter for token %s", tokenKey)
|
||||
rateLimiter.WaitForToken(tokenKey)
|
||||
log.Debugf("kiro: stream rate limiter cleared for token %s", tokenKey)
|
||||
|
||||
reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
|
||||
defer reporter.trackFailure(ctx, &err)
|
||||
|
||||
@@ -640,7 +1006,7 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
|
||||
// Execute stream with retry on 401/403 and 429 (quota exhausted)
|
||||
// Note: currentOrigin and kiroPayload are built inside executeStreamWithRetry for each endpoint
|
||||
return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, reporter, "", kiroModelID, isAgentic, isChatOnly)
|
||||
return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, reporter, "", kiroModelID, isAgentic, isChatOnly, tokenKey)
|
||||
}
|
||||
|
||||
// executeStreamWithRetry performs the streaming HTTP request with automatic retry on auth errors.
|
||||
@@ -648,8 +1014,11 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
// - Amazon Q endpoint (CLI origin) uses Amazon Q Developer quota
|
||||
// - CodeWhisperer endpoint (AI_EDITOR origin) uses Kiro IDE quota
|
||||
// Also supports multi-endpoint fallback similar to Antigravity implementation.
|
||||
func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, accessToken, profileArn string, kiroPayload, body []byte, from sdktranslator.Format, reporter *usageReporter, currentOrigin, kiroModelID string, isAgentic, isChatOnly bool) (<-chan cliproxyexecutor.StreamChunk, error) {
|
||||
// tokenKey is used for rate limiting and cooldown tracking.
|
||||
func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, accessToken, profileArn string, kiroPayload, body []byte, from sdktranslator.Format, reporter *usageReporter, currentOrigin, kiroModelID string, isAgentic, isChatOnly bool, tokenKey string) (<-chan cliproxyexecutor.StreamChunk, error) {
|
||||
maxRetries := 2 // Allow retries for token refresh + endpoint fallback
|
||||
rateLimiter := kiroauth.GetGlobalRateLimiter()
|
||||
cooldownMgr := kiroauth.GetGlobalCooldownManager()
|
||||
endpointConfigs := getKiroEndpointConfigs(auth)
|
||||
var last429Err error
|
||||
|
||||
@@ -667,6 +1036,13 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
|
||||
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
// Apply human-like delay before first streaming request (not on retries)
|
||||
// This mimics natural user behavior patterns
|
||||
// Note: Delay is NOT applied during streaming response - only before initial request
|
||||
if attempt == 0 && endpointIdx == 0 {
|
||||
kiroauth.ApplyHumanLikeDelay()
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(kiroPayload))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -677,20 +1053,9 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
|
||||
httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
|
||||
|
||||
// Use different headers based on auth type
|
||||
// IDC auth uses Kiro IDE style headers (from kiro2api)
|
||||
// Other auth types use Amazon Q CLI style headers
|
||||
if isIDCAuth(auth) {
|
||||
httpReq.Header.Set("User-Agent", kiroIDEUserAgent)
|
||||
httpReq.Header.Set("X-Amz-User-Agent", kiroIDEAmzUserAgent)
|
||||
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
|
||||
log.Debugf("kiro: using Kiro IDE headers for IDC auth")
|
||||
} else {
|
||||
httpReq.Header.Set("User-Agent", kiroUserAgent)
|
||||
httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
|
||||
httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroAgentModeVibe)
|
||||
}
|
||||
httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")
|
||||
// Apply dynamic fingerprint-based headers
|
||||
applyDynamicFingerprint(httpReq, auth)
|
||||
|
||||
httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
|
||||
httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
|
||||
|
||||
@@ -721,10 +1086,20 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
|
||||
httpClient := newKiroHTTPClientWithPooling(ctx, e.cfg, auth, 0)
|
||||
httpResp, err := httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, err)
|
||||
|
||||
// Enhanced socket retry for streaming: Check if error is retryable (network timeout, connection reset, etc.)
|
||||
retryCfg := defaultRetryConfig()
|
||||
if isRetryableError(err) && attempt < retryCfg.MaxRetries {
|
||||
delay := calculateRetryDelay(attempt, retryCfg)
|
||||
logRetryAttempt(attempt, retryCfg.MaxRetries, fmt.Sprintf("stream socket error: %v", err), delay, endpointConfig.Name)
|
||||
time.Sleep(delay)
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
@@ -736,6 +1111,12 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
_ = httpResp.Body.Close()
|
||||
appendAPIResponseChunk(ctx, e.cfg, respBody)
|
||||
|
||||
// Record failure and set cooldown for 429
|
||||
rateLimiter.MarkTokenFailed(tokenKey)
|
||||
cooldownDuration := kiroauth.CalculateCooldownFor429(attempt)
|
||||
cooldownMgr.SetCooldown(tokenKey, cooldownDuration, kiroauth.CooldownReason429)
|
||||
log.Warnf("kiro: stream rate limit hit (429), token %s set to cooldown for %v", tokenKey, cooldownDuration)
|
||||
|
||||
// Preserve last 429 so callers can correctly backoff when all endpoints are exhausted
|
||||
last429Err = statusErr{code: httpResp.StatusCode, msg: string(respBody)}
|
||||
|
||||
@@ -747,13 +1128,21 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
}
|
||||
|
||||
// Handle 5xx server errors with exponential backoff retry
|
||||
// Enhanced: Use retryConfig for consistent retry behavior
|
||||
if httpResp.StatusCode >= 500 && httpResp.StatusCode < 600 {
|
||||
respBody, _ := io.ReadAll(httpResp.Body)
|
||||
_ = httpResp.Body.Close()
|
||||
appendAPIResponseChunk(ctx, e.cfg, respBody)
|
||||
|
||||
if attempt < maxRetries {
|
||||
// Exponential backoff: 1s, 2s, 4s... (max 30s)
|
||||
retryCfg := defaultRetryConfig()
|
||||
// Check if this specific 5xx code is retryable (502, 503, 504)
|
||||
if isRetryableHTTPStatus(httpResp.StatusCode) && attempt < retryCfg.MaxRetries {
|
||||
delay := calculateRetryDelay(attempt, retryCfg)
|
||||
logRetryAttempt(attempt, retryCfg.MaxRetries, fmt.Sprintf("stream HTTP %d", httpResp.StatusCode), delay, endpointConfig.Name)
|
||||
time.Sleep(delay)
|
||||
continue
|
||||
} else if attempt < maxRetries {
|
||||
// Fallback for other 5xx errors (500, 501, etc.)
|
||||
backoff := time.Duration(1<<attempt) * time.Second
|
||||
if backoff > 30*time.Second {
|
||||
backoff = 30 * time.Second
|
||||
@@ -840,7 +1229,10 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
|
||||
// Check for SUSPENDED status - return immediately without retry
|
||||
if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") {
|
||||
log.Errorf("kiro: account is suspended, cannot proceed")
|
||||
// Set long cooldown for suspended accounts
|
||||
rateLimiter.CheckAndMarkSuspended(tokenKey, respBodyStr)
|
||||
cooldownMgr.SetCooldown(tokenKey, kiroauth.LongCooldown, kiroauth.CooldownReasonSuspended)
|
||||
log.Errorf("kiro: stream account is suspended, token %s set to cooldown for %v", tokenKey, kiroauth.LongCooldown)
|
||||
return nil, statusErr{code: httpResp.StatusCode, msg: "account suspended: " + string(respBody)}
|
||||
}
|
||||
|
||||
@@ -890,6 +1282,11 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
|
||||
|
||||
out := make(chan cliproxyexecutor.StreamChunk)
|
||||
|
||||
// Record success immediately since connection was established successfully
|
||||
// Streaming errors will be handled separately
|
||||
rateLimiter.MarkTokenSuccess(tokenKey)
|
||||
log.Debugf("kiro: stream request successful, token %s marked as success", tokenKey)
|
||||
|
||||
go func(resp *http.Response, thinkingEnabled bool) {
|
||||
defer close(out)
|
||||
defer func() {
|
||||
|
||||
Reference in New Issue
Block a user