Merge branch 'router-for-me:main' into main

fix(caching): ensure prompt-caching beta is always appended and add multi-turn cache control tests
chore(docs): add links to mainline repository in README files
2026-03-09 23:33:24 +00:00 · 2026-01-31 01:43:48 +08:00 · 2026-01-31 01:42:58 +08:00 · 2026-01-31 01:24:29 +08:00 · 2026-01-31 01:22:28 +08:00 · 2026-01-31 01:15:14 +08:00
28 changed files with 1765 additions and 179 deletions
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ mkdir -p ~/cli-proxy && cd ~/cli-proxy
 cat > docker-compose.yml << 'EOF'
 services:
  cli-proxy-api:
-    image: 17600006524/cli-proxy-api-plus:latest
+    image: eceasy/cli-proxy-api-plus:latest
    container_name: cli-proxy-api-plus
    ports:
      - "8317:8317"
@@ -64,7 +64,7 @@ services:
 EOF

 # Download example config
-curl -o config.yaml https://raw.githubusercontent.com/linlang781/CLIProxyAPIPlus/main/config.example.yaml
+curl -o config.yaml https://raw.githubusercontent.com/router-for-me/CLIProxyAPIPlus/main/config.example.yaml

 # Pull and start
 docker compose pull && docker compose up -d
@@ -93,7 +93,7 @@ docker compose pull && docker compose up -d

 This project only accepts pull requests that relate to third-party provider support. Any pull requests unrelated to third-party provider support will be rejected.

-If you need to submit any non-third-party provider changes, please open them against the mainline repository.
+If you need to submit any non-third-party provider changes, please open them against the [mainline](https://github.com/router-for-me/CLIProxyAPI) repository.

 ## License

--- a/README_CN.md
+++ b/README_CN.md
@@ -52,7 +52,7 @@ mkdir -p ~/cli-proxy && cd ~/cli-proxy
 cat > docker-compose.yml << 'EOF'
 services:
  cli-proxy-api:
-    image: 17600006524/cli-proxy-api-plus:latest
+    image: eceasy/cli-proxy-api-plus:latest
    container_name: cli-proxy-api-plus
    ports:
      - "8317:8317"
@@ -64,7 +64,7 @@ services:
 EOF

 # 下载示例配置
-curl -o config.yaml https://raw.githubusercontent.com/linlang781/CLIProxyAPIPlus/main/config.example.yaml
+curl -o config.yaml https://raw.githubusercontent.com/router-for-me/CLIProxyAPIPlus/main/config.example.yaml

 # 拉取并启动
 docker compose pull && docker compose up -d
@@ -93,7 +93,7 @@ docker compose pull && docker compose up -d

 该项目仅接受第三方供应商支持的 Pull Request。任何非第三方供应商支持的 Pull Request 都将被拒绝。

-如果需要提交任何非第三方供应商支持的 Pull Request，请提交到主线版本。
+如果需要提交任何非第三方供应商支持的 Pull Request，请提交到[主线](https://github.com/router-for-me/CLIProxyAPI)版本。

 ## 许可证

--- a/go.mod
+++ b/go.mod
@@ -13,6 +13,7 @@ require (
 	github.com/joho/godotenv v1.5.1
 	github.com/klauspost/compress v1.17.4
 	github.com/minio/minio-go/v7 v7.0.66
+	github.com/refraction-networking/utls v1.8.2
 	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c
 	github.com/sirupsen/logrus v1.9.3
 	github.com/tidwall/gjson v1.18.0
--- a/go.sum
+++ b/go.sum
@@ -122,6 +122,8 @@ github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmd
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/refraction-networking/utls v1.8.2 h1:j4Q1gJj0xngdeH+Ox/qND11aEfhpgoEvV+S9iJ2IdQo=
+github.com/refraction-networking/utls v1.8.2/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
 github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc=
--- a/internal/auth/claude/anthropic_auth.go
+++ b/internal/auth/claude/anthropic_auth.go
@@ -14,7 +14,6 @@ import (
 	"time"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 )

@@ -51,7 +50,8 @@ type ClaudeAuth struct {
 }

 // NewClaudeAuth creates a new Anthropic authentication service.
-// It initializes the HTTP client with proxy settings from the configuration.
+// It initializes the HTTP client with a custom TLS transport that uses Firefox
+// fingerprint to bypass Cloudflare's TLS fingerprinting on Anthropic domains.
 //
 // Parameters:
 //   - cfg: The application configuration containing proxy settings
@@ -59,8 +59,10 @@ type ClaudeAuth struct {
 // Returns:
 //   - *ClaudeAuth: A new Claude authentication service instance
 func NewClaudeAuth(cfg *config.Config) *ClaudeAuth {
+	// Use custom HTTP client with Firefox TLS fingerprint to bypass
+	// Cloudflare's bot detection on Anthropic domains
 	return &ClaudeAuth{
-		httpClient: util.SetProxy(&cfg.SDKConfig, &http.Client{}),
+		httpClient: NewAnthropicHttpClient(&cfg.SDKConfig),
 	}
 }

--- a/internal/auth/claude/utls_transport.go
+++ b/internal/auth/claude/utls_transport.go
@@ -0,0 +1,165 @@
+// Package claude provides authentication functionality for Anthropic's Claude API.
+// This file implements a custom HTTP transport using utls to bypass TLS fingerprinting.
+package claude
+
+import (
+	"net/http"
+	"net/url"
+	"strings"
+	"sync"
+
+	tls "github.com/refraction-networking/utls"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/net/http2"
+	"golang.org/x/net/proxy"
+)
+
+// utlsRoundTripper implements http.RoundTripper using utls with Firefox fingerprint
+// to bypass Cloudflare's TLS fingerprinting on Anthropic domains.
+type utlsRoundTripper struct {
+	// mu protects the connections map and pending map
+	mu sync.Mutex
+	// connections caches HTTP/2 client connections per host
+	connections map[string]*http2.ClientConn
+	// pending tracks hosts that are currently being connected to (prevents race condition)
+	pending map[string]*sync.Cond
+	// dialer is used to create network connections, supporting proxies
+	dialer proxy.Dialer
+}
+
+// newUtlsRoundTripper creates a new utls-based round tripper with optional proxy support
+func newUtlsRoundTripper(cfg *config.SDKConfig) *utlsRoundTripper {
+	var dialer proxy.Dialer = proxy.Direct
+	if cfg != nil && cfg.ProxyURL != "" {
+		proxyURL, err := url.Parse(cfg.ProxyURL)
+		if err != nil {
+			log.Errorf("failed to parse proxy URL %q: %v", cfg.ProxyURL, err)
+		} else {
+			pDialer, err := proxy.FromURL(proxyURL, proxy.Direct)
+			if err != nil {
+				log.Errorf("failed to create proxy dialer for %q: %v", cfg.ProxyURL, err)
+			} else {
+				dialer = pDialer
+			}
+		}
+	}
+
+	return &utlsRoundTripper{
+		connections: make(map[string]*http2.ClientConn),
+		pending:     make(map[string]*sync.Cond),
+		dialer:      dialer,
+	}
+}
+
+// getOrCreateConnection gets an existing connection or creates a new one.
+// It uses a per-host locking mechanism to prevent multiple goroutines from
+// creating connections to the same host simultaneously.
+func (t *utlsRoundTripper) getOrCreateConnection(host, addr string) (*http2.ClientConn, error) {
+	t.mu.Lock()
+
+	// Check if connection exists and is usable
+	if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
+		t.mu.Unlock()
+		return h2Conn, nil
+	}
+
+	// Check if another goroutine is already creating a connection
+	if cond, ok := t.pending[host]; ok {
+		// Wait for the other goroutine to finish
+		cond.Wait()
+		// Check if connection is now available
+		if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
+			t.mu.Unlock()
+			return h2Conn, nil
+		}
+		// Connection still not available, we'll create one
+	}
+
+	// Mark this host as pending
+	cond := sync.NewCond(&t.mu)
+	t.pending[host] = cond
+	t.mu.Unlock()
+
+	// Create connection outside the lock
+	h2Conn, err := t.createConnection(host, addr)
+
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	// Remove pending marker and wake up waiting goroutines
+	delete(t.pending, host)
+	cond.Broadcast()
+
+	if err != nil {
+		return nil, err
+	}
+
+	// Store the new connection
+	t.connections[host] = h2Conn
+	return h2Conn, nil
+}
+
+// createConnection creates a new HTTP/2 connection with Firefox TLS fingerprint
+func (t *utlsRoundTripper) createConnection(host, addr string) (*http2.ClientConn, error) {
+	conn, err := t.dialer.Dial("tcp", addr)
+	if err != nil {
+		return nil, err
+	}
+
+	tlsConfig := &tls.Config{ServerName: host}
+	tlsConn := tls.UClient(conn, tlsConfig, tls.HelloFirefox_Auto)
+
+	if err := tlsConn.Handshake(); err != nil {
+		conn.Close()
+		return nil, err
+	}
+
+	tr := &http2.Transport{}
+	h2Conn, err := tr.NewClientConn(tlsConn)
+	if err != nil {
+		tlsConn.Close()
+		return nil, err
+	}
+
+	return h2Conn, nil
+}
+
+// RoundTrip implements http.RoundTripper
+func (t *utlsRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
+	host := req.URL.Host
+	addr := host
+	if !strings.Contains(addr, ":") {
+		addr += ":443"
+	}
+
+	// Get hostname without port for TLS ServerName
+	hostname := req.URL.Hostname()
+
+	h2Conn, err := t.getOrCreateConnection(hostname, addr)
+	if err != nil {
+		return nil, err
+	}
+
+	resp, err := h2Conn.RoundTrip(req)
+	if err != nil {
+		// Connection failed, remove it from cache
+		t.mu.Lock()
+		if cached, ok := t.connections[hostname]; ok && cached == h2Conn {
+			delete(t.connections, hostname)
+		}
+		t.mu.Unlock()
+		return nil, err
+	}
+
+	return resp, nil
+}
+
+// NewAnthropicHttpClient creates an HTTP client that bypasses TLS fingerprinting
+// for Anthropic domains by using utls with Firefox fingerprint.
+// It accepts optional SDK configuration for proxy settings.
+func NewAnthropicHttpClient(cfg *config.SDKConfig) *http.Client {
+	return &http.Client{
+		Transport: newUtlsRoundTripper(cfg),
+	}
+}
--- a/internal/auth/kiro/aws.go
+++ b/internal/auth/kiro/aws.go
@@ -32,14 +32,17 @@ type KiroTokenData struct {
 	ProfileArn string `json:"profileArn"`
 	// ExpiresAt is the timestamp when the token expires
 	ExpiresAt string `json:"expiresAt"`
-	// AuthMethod indicates the authentication method used (e.g., "builder-id", "social")
+	// AuthMethod indicates the authentication method used (e.g., "builder-id", "social", "idc")
 	AuthMethod string `json:"authMethod"`
-	// Provider indicates the OAuth provider (e.g., "AWS", "Google")
+	// Provider indicates the OAuth provider (e.g., "AWS", "Google", "Enterprise")
 	Provider string `json:"provider"`
 	// ClientID is the OIDC client ID (needed for token refresh)
 	ClientID string `json:"clientId,omitempty"`
 	// ClientSecret is the OIDC client secret (needed for token refresh)
 	ClientSecret string `json:"clientSecret,omitempty"`
+	// ClientIDHash is the hash of client ID used to locate device registration file
+	// (Enterprise Kiro IDE stores clientId/clientSecret in ~/.aws/sso/cache/{clientIdHash}.json)
+	ClientIDHash string `json:"clientIdHash,omitempty"`
 	// Email is the user's email address (used for file naming)
 	Email string `json:"email,omitempty"`
 	// StartURL is the IDC/Identity Center start URL (only for IDC auth method)
@@ -169,6 +172,8 @@ func LoadKiroIDETokenWithRetry(maxAttempts int, baseDelay time.Duration) (*KiroT
 }

 // LoadKiroIDEToken loads token data from Kiro IDE's token file.
+// For Enterprise Kiro IDE (IDC auth), it also loads clientId and clientSecret
+// from the device registration file referenced by clientIdHash.
 func LoadKiroIDEToken() (*KiroTokenData, error) {
 	homeDir, err := os.UserHomeDir()
 	if err != nil {
@@ -193,18 +198,69 @@ func LoadKiroIDEToken() (*KiroTokenData, error) {
 	// Normalize AuthMethod to lowercase (Kiro IDE uses "IdC" but we expect "idc")
 	token.AuthMethod = strings.ToLower(token.AuthMethod)

+	// For Enterprise Kiro IDE (IDC auth), load clientId and clientSecret from device registration
+	// The device registration file is located at ~/.aws/sso/cache/{clientIdHash}.json
+	if token.ClientIDHash != "" && token.ClientID == "" {
+		if err := loadDeviceRegistration(homeDir, token.ClientIDHash, &token); err != nil {
+			// Log warning but don't fail - token might still work for some operations
+			fmt.Printf("warning: failed to load device registration for clientIdHash %s: %v\n", token.ClientIDHash, err)
+		}
+	}
+
 	return &token, nil
 }

+// loadDeviceRegistration loads clientId and clientSecret from the device registration file.
+// Enterprise Kiro IDE stores these in ~/.aws/sso/cache/{clientIdHash}.json
+func loadDeviceRegistration(homeDir, clientIDHash string, token *KiroTokenData) error {
+	if clientIDHash == "" {
+		return fmt.Errorf("clientIdHash is empty")
+	}
+
+	// Sanitize clientIdHash to prevent path traversal
+	if strings.Contains(clientIDHash, "/") || strings.Contains(clientIDHash, "\\") || strings.Contains(clientIDHash, "..") {
+		return fmt.Errorf("invalid clientIdHash: contains path separator")
+	}
+
+	deviceRegPath := filepath.Join(homeDir, ".aws", "sso", "cache", clientIDHash+".json")
+	data, err := os.ReadFile(deviceRegPath)
+	if err != nil {
+		return fmt.Errorf("failed to read device registration file (%s): %w", deviceRegPath, err)
+	}
+
+	// Device registration file structure
+	var deviceReg struct {
+		ClientID     string `json:"clientId"`
+		ClientSecret string `json:"clientSecret"`
+		ExpiresAt    string `json:"expiresAt"`
+	}
+
+	if err := json.Unmarshal(data, &deviceReg); err != nil {
+		return fmt.Errorf("failed to parse device registration: %w", err)
+	}
+
+	if deviceReg.ClientID == "" || deviceReg.ClientSecret == "" {
+		return fmt.Errorf("device registration missing clientId or clientSecret")
+	}
+
+	token.ClientID = deviceReg.ClientID
+	token.ClientSecret = deviceReg.ClientSecret
+
+	return nil
+}
+
 // LoadKiroTokenFromPath loads token data from a custom path.
 // This supports multiple accounts by allowing different token files.
+// For Enterprise Kiro IDE (IDC auth), it also loads clientId and clientSecret
+// from the device registration file referenced by clientIdHash.
 func LoadKiroTokenFromPath(tokenPath string) (*KiroTokenData, error) {
+	homeDir, err := os.UserHomeDir()
+	if err != nil {
+		return nil, fmt.Errorf("failed to get home directory: %w", err)
+	}
+
 	// Expand ~ to home directory
 	if len(tokenPath) > 0 && tokenPath[0] == '~' {
-		homeDir, err := os.UserHomeDir()
-		if err != nil {
-			return nil, fmt.Errorf("failed to get home directory: %w", err)
-		}
 		tokenPath = filepath.Join(homeDir, tokenPath[1:])
 	}

@@ -225,6 +281,14 @@ func LoadKiroTokenFromPath(tokenPath string) (*KiroTokenData, error) {
 	// Normalize AuthMethod to lowercase (Kiro IDE uses "IdC" but we expect "idc")
 	token.AuthMethod = strings.ToLower(token.AuthMethod)

+	// For Enterprise Kiro IDE (IDC auth), load clientId and clientSecret from device registration
+	if token.ClientIDHash != "" && token.ClientID == "" {
+		if err := loadDeviceRegistration(homeDir, token.ClientIDHash, &token); err != nil {
+			// Log warning but don't fail - token might still work for some operations
+			fmt.Printf("warning: failed to load device registration for clientIdHash %s: %v\n", token.ClientIDHash, err)
+		}
+	}
+
 	return &token, nil
 }

--- a/internal/auth/kiro/background_refresh.go
+++ b/internal/auth/kiro/background_refresh.go
@@ -161,40 +161,59 @@ func (r *BackgroundRefresher) refreshBatch(ctx context.Context) {
 }

 func (r *BackgroundRefresher) refreshSingle(ctx context.Context, token *Token) {
-	var newTokenData *KiroTokenData
-	var err error
-
 	// Normalize auth method to lowercase for case-insensitive matching
 	authMethod := strings.ToLower(token.AuthMethod)

-	switch authMethod {
-	case "idc":
-		newTokenData, err = r.ssoClient.RefreshTokenWithRegion(
-			ctx,
-			token.ClientID,
-			token.ClientSecret,
-			token.RefreshToken,
-			token.Region,
-			token.StartURL,
-		)
-	case "builder-id":
-		newTokenData, err = r.ssoClient.RefreshToken(
-			ctx,
-			token.ClientID,
-			token.ClientSecret,
-			token.RefreshToken,
-		)
-	default:
-		newTokenData, err = r.oauth.RefreshToken(ctx, token.RefreshToken)
+	// Create refresh function based on auth method
+	refreshFunc := func(ctx context.Context) (*KiroTokenData, error) {
+		switch authMethod {
+		case "idc":
+			return r.ssoClient.RefreshTokenWithRegion(
+				ctx,
+				token.ClientID,
+				token.ClientSecret,
+				token.RefreshToken,
+				token.Region,
+				token.StartURL,
+			)
+		case "builder-id":
+			return r.ssoClient.RefreshToken(
+				ctx,
+				token.ClientID,
+				token.ClientSecret,
+				token.RefreshToken,
+			)
+		default:
+			return r.oauth.RefreshTokenWithFingerprint(ctx, token.RefreshToken, token.ID)
+		}
 	}

-	if err != nil {
-		log.Printf("failed to refresh token %s: %v", token.ID, err)
+	// Use graceful degradation for better reliability
+	result := RefreshWithGracefulDegradation(
+		ctx,
+		refreshFunc,
+		token.AccessToken,
+		token.ExpiresAt,
+	)
+
+	if result.Error != nil {
+		log.Printf("failed to refresh token %s: %v", token.ID, result.Error)
+		return
+	}
+
+	newTokenData := result.TokenData
+	if result.UsedFallback {
+		log.Printf("token %s: using existing token as fallback (refresh failed but token still valid)", token.ID)
+		// Don't update the token file if we're using fallback
+		// Just update LastVerified to prevent immediate re-check
+		token.LastVerified = time.Now()
 		return
 	}

 	token.AccessToken = newTokenData.AccessToken
-	token.RefreshToken = newTokenData.RefreshToken
+	if newTokenData.RefreshToken != "" {
+		token.RefreshToken = newTokenData.RefreshToken
+	}
 	token.LastVerified = time.Now()

 	if newTokenData.ExpiresAt != "" {
--- a/internal/auth/kiro/oauth.go
+++ b/internal/auth/kiro/oauth.go
@@ -190,7 +190,7 @@ func (o *KiroOAuth) exchangeCodeForToken(ctx context.Context, code, codeVerifier
 	}

 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("User-Agent", "cli-proxy-api/1.0.0")
+	req.Header.Set("User-Agent", "KiroIDE-0.7.45-cli-proxy-api")

 	resp, err := o.httpClient.Do(req)
 	if err != nil {
@@ -232,7 +232,14 @@ func (o *KiroOAuth) exchangeCodeForToken(ctx context.Context, code, codeVerifier
 }

 // RefreshToken refreshes an expired access token.
+// Uses KiroIDE-style User-Agent to match official Kiro IDE behavior.
 func (o *KiroOAuth) RefreshToken(ctx context.Context, refreshToken string) (*KiroTokenData, error) {
+	return o.RefreshTokenWithFingerprint(ctx, refreshToken, "")
+}
+
+// RefreshTokenWithFingerprint refreshes an expired access token with a specific fingerprint.
+// tokenKey is used to generate a consistent fingerprint for the token.
+func (o *KiroOAuth) RefreshTokenWithFingerprint(ctx context.Context, refreshToken, tokenKey string) (*KiroTokenData, error) {
 	payload := map[string]string{
 		"refreshToken": refreshToken,
 	}
@@ -249,7 +256,11 @@ func (o *KiroOAuth) RefreshToken(ctx context.Context, refreshToken string) (*Kir
 	}

 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("User-Agent", "cli-proxy-api/1.0.0")
+
+	// Use KiroIDE-style User-Agent to match official Kiro IDE behavior
+	// This helps avoid 403 errors from server-side User-Agent validation
+	userAgent := buildKiroUserAgent(tokenKey)
+	req.Header.Set("User-Agent", userAgent)

 	resp, err := o.httpClient.Do(req)
 	if err != nil {
@@ -264,7 +275,7 @@ func (o *KiroOAuth) RefreshToken(ctx context.Context, refreshToken string) (*Kir

 	if resp.StatusCode != http.StatusOK {
 		log.Debugf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
-		return nil, fmt.Errorf("token refresh failed (status %d)", resp.StatusCode)
+		return nil, fmt.Errorf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
 	}

 	var tokenResp KiroTokenResponse
@@ -290,6 +301,19 @@ func (o *KiroOAuth) RefreshToken(ctx context.Context, refreshToken string) (*Kir
 	}, nil
 }

+// buildKiroUserAgent builds a KiroIDE-style User-Agent string.
+// If tokenKey is provided, uses fingerprint manager for consistent fingerprint.
+// Otherwise generates a simple KiroIDE User-Agent.
+func buildKiroUserAgent(tokenKey string) string {
+	if tokenKey != "" {
+		fm := NewFingerprintManager()
+		fp := fm.GetFingerprint(tokenKey)
+		return fmt.Sprintf("KiroIDE-%s-%s", fp.KiroVersion, fp.KiroHash[:16])
+	}
+	// Default KiroIDE User-Agent matching kiro-openai-gateway format
+	return "KiroIDE-0.7.45-cli-proxy-api"
+}
+
 // LoginWithGoogle performs OAuth login with Google using Kiro's social auth.
 // This uses a custom protocol handler (kiro://) to receive the callback.
 func (o *KiroOAuth) LoginWithGoogle(ctx context.Context) (*KiroTokenData, error) {
--- a/internal/auth/kiro/rate_limiter.go
+++ b/internal/auth/kiro/rate_limiter.go
@@ -9,14 +9,14 @@ import (
 )

 const (
-	DefaultMinTokenInterval  = 10 * time.Second
-	DefaultMaxTokenInterval  = 30 * time.Second
+	DefaultMinTokenInterval  = 1 * time.Second
+	DefaultMaxTokenInterval  = 2 * time.Second
 	DefaultDailyMaxRequests  = 500
 	DefaultJitterPercent     = 0.3
-	DefaultBackoffBase       = 2 * time.Minute
-	DefaultBackoffMax        = 60 * time.Minute
-	DefaultBackoffMultiplier = 2.0
-	DefaultSuspendCooldown   = 24 * time.Hour
+	DefaultBackoffBase       = 30 * time.Second
+	DefaultBackoffMax        = 5 * time.Minute
+	DefaultBackoffMultiplier = 1.5
+	DefaultSuspendCooldown   = 1 * time.Hour
 )

 // TokenState Token 状态
--- a/internal/auth/kiro/refresh_utils.go
+++ b/internal/auth/kiro/refresh_utils.go
@@ -0,0 +1,159 @@
+// Package kiro provides refresh utilities for Kiro token management.
+package kiro
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+)
+
+// RefreshResult contains the result of a token refresh attempt.
+type RefreshResult struct {
+	TokenData    *KiroTokenData
+	Error        error
+	UsedFallback bool // True if we used the existing token as fallback
+}
+
+// RefreshWithGracefulDegradation attempts to refresh a token with graceful degradation.
+// If refresh fails but the existing access token is still valid, it returns the existing token.
+// This matches kiro-openai-gateway's behavior for better reliability.
+//
+// Parameters:
+//   - ctx: Context for the request
+//   - refreshFunc: Function to perform the actual refresh
+//   - existingAccessToken: Current access token (for fallback)
+//   - expiresAt: Expiration time of the existing token
+//
+// Returns:
+//   - RefreshResult containing the new or existing token data
+func RefreshWithGracefulDegradation(
+	ctx context.Context,
+	refreshFunc func(ctx context.Context) (*KiroTokenData, error),
+	existingAccessToken string,
+	expiresAt time.Time,
+) RefreshResult {
+	// Try to refresh the token
+	newTokenData, err := refreshFunc(ctx)
+	if err == nil {
+		return RefreshResult{
+			TokenData:    newTokenData,
+			Error:        nil,
+			UsedFallback: false,
+		}
+	}
+
+	// Refresh failed - check if we can use the existing token
+	log.Warnf("kiro: token refresh failed: %v", err)
+
+	// Check if existing token is still valid (not expired)
+	if existingAccessToken != "" && time.Now().Before(expiresAt) {
+		remainingTime := time.Until(expiresAt)
+		log.Warnf("kiro: using existing access token (expires in %v). Will retry refresh later.", remainingTime.Round(time.Second))
+
+		return RefreshResult{
+			TokenData: &KiroTokenData{
+				AccessToken: existingAccessToken,
+				ExpiresAt:   expiresAt.Format(time.RFC3339),
+			},
+			Error:        nil,
+			UsedFallback: true,
+		}
+	}
+
+	// Token is expired and refresh failed - return the error
+	return RefreshResult{
+		TokenData:    nil,
+		Error:        fmt.Errorf("token refresh failed and existing token is expired: %w", err),
+		UsedFallback: false,
+	}
+}
+
+// IsTokenExpiringSoon checks if a token is expiring within the given threshold.
+// Default threshold is 5 minutes if not specified.
+func IsTokenExpiringSoon(expiresAt time.Time, threshold time.Duration) bool {
+	if threshold == 0 {
+		threshold = 5 * time.Minute
+	}
+	return time.Now().Add(threshold).After(expiresAt)
+}
+
+// IsTokenExpired checks if a token has already expired.
+func IsTokenExpired(expiresAt time.Time) bool {
+	return time.Now().After(expiresAt)
+}
+
+// ParseExpiresAt parses an expiration time string in RFC3339 format.
+// Returns zero time if parsing fails.
+func ParseExpiresAt(expiresAtStr string) time.Time {
+	if expiresAtStr == "" {
+		return time.Time{}
+	}
+	t, err := time.Parse(time.RFC3339, expiresAtStr)
+	if err != nil {
+		log.Debugf("kiro: failed to parse expiresAt '%s': %v", expiresAtStr, err)
+		return time.Time{}
+	}
+	return t
+}
+
+// RefreshConfig contains configuration for token refresh behavior.
+type RefreshConfig struct {
+	// MaxRetries is the maximum number of refresh attempts (default: 1)
+	MaxRetries int
+	// RetryDelay is the delay between retry attempts (default: 1 second)
+	RetryDelay time.Duration
+	// RefreshThreshold is how early to refresh before expiration (default: 5 minutes)
+	RefreshThreshold time.Duration
+	// EnableGracefulDegradation allows using existing token if refresh fails (default: true)
+	EnableGracefulDegradation bool
+}
+
+// DefaultRefreshConfig returns the default refresh configuration.
+func DefaultRefreshConfig() RefreshConfig {
+	return RefreshConfig{
+		MaxRetries:                1,
+		RetryDelay:                time.Second,
+		RefreshThreshold:          5 * time.Minute,
+		EnableGracefulDegradation: true,
+	}
+}
+
+// RefreshWithRetry attempts to refresh a token with retry logic.
+func RefreshWithRetry(
+	ctx context.Context,
+	refreshFunc func(ctx context.Context) (*KiroTokenData, error),
+	config RefreshConfig,
+) (*KiroTokenData, error) {
+	var lastErr error
+
+	maxAttempts := config.MaxRetries + 1
+	if maxAttempts < 1 {
+		maxAttempts = 1
+	}
+
+	for attempt := 1; attempt <= maxAttempts; attempt++ {
+		tokenData, err := refreshFunc(ctx)
+		if err == nil {
+			if attempt > 1 {
+				log.Infof("kiro: token refresh succeeded on attempt %d", attempt)
+			}
+			return tokenData, nil
+		}
+
+		lastErr = err
+		log.Warnf("kiro: token refresh attempt %d/%d failed: %v", attempt, maxAttempts, err)
+
+		// Don't sleep after the last attempt
+		if attempt < maxAttempts {
+			select {
+			case <-ctx.Done():
+				return nil, ctx.Err()
+			case <-time.After(config.RetryDelay):
+			}
+		}
+	}
+
+	return nil, fmt.Errorf("token refresh failed after %d attempts: %w", maxAttempts, lastErr)
+}
--- a/internal/auth/kiro/social_auth.go
+++ b/internal/auth/kiro/social_auth.go
@@ -229,7 +229,7 @@ func (c *SocialAuthClient) CreateToken(ctx context.Context, req *CreateTokenRequ
 	}

 	httpReq.Header.Set("Content-Type", "application/json")
-	httpReq.Header.Set("User-Agent", "cli-proxy-api/1.0.0")
+	httpReq.Header.Set("User-Agent", "KiroIDE-0.7.45-cli-proxy-api")

 	resp, err := c.httpClient.Do(httpReq)
 	if err != nil {
--- a/internal/auth/kiro/sso_oidc.go
+++ b/internal/auth/kiro/sso_oidc.go
@@ -684,6 +684,7 @@ func (c *SSOOIDCClient) CreateToken(ctx context.Context, clientID, clientSecret,
 }

 // RefreshToken refreshes an access token using the refresh token.
+// Includes retry logic and improved error handling for better reliability.
 func (c *SSOOIDCClient) RefreshToken(ctx context.Context, clientID, clientSecret, refreshToken string) (*KiroTokenData, error) {
 	payload := map[string]string{
 		"clientId":     clientID,
@@ -701,8 +702,13 @@ func (c *SSOOIDCClient) RefreshToken(ctx context.Context, clientID, clientSecret
 	if err != nil {
 		return nil, err
 	}
+
+	// Set headers matching Kiro IDE behavior for better compatibility
 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("User-Agent", kiroUserAgent)
+	req.Header.Set("Host", "oidc.us-east-1.amazonaws.com")
+	req.Header.Set("x-amz-user-agent", idcAmzUserAgent)
+	req.Header.Set("User-Agent", "node")
+	req.Header.Set("Accept", "*/*")

 	resp, err := c.httpClient.Do(req)
 	if err != nil {
@@ -716,8 +722,8 @@ func (c *SSOOIDCClient) RefreshToken(ctx context.Context, clientID, clientSecret
 	}

 	if resp.StatusCode != http.StatusOK {
-		log.Debugf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
-		return nil, fmt.Errorf("token refresh failed (status %d)", resp.StatusCode)
+		log.Warnf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
+		return nil, fmt.Errorf("token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
 	}

 	var result CreateTokenResponse
--- a/internal/runtime/executor/caching_verify_test.go
+++ b/internal/runtime/executor/caching_verify_test.go
@@ -0,0 +1,258 @@
+package executor
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/tidwall/gjson"
+)
+
+func TestEnsureCacheControl(t *testing.T) {
+	// Test case 1: System prompt as string
+	t.Run("String System Prompt", func(t *testing.T) {
+		input := []byte(`{"model": "claude-3-5-sonnet", "system": "This is a long system prompt", "messages": []}`)
+		output := ensureCacheControl(input)
+
+		res := gjson.GetBytes(output, "system.0.cache_control.type")
+		if res.String() != "ephemeral" {
+			t.Errorf("cache_control not found in system string. Output: %s", string(output))
+		}
+	})
+
+	// Test case 2: System prompt as array
+	t.Run("Array System Prompt", func(t *testing.T) {
+		input := []byte(`{"model": "claude-3-5-sonnet", "system": [{"type": "text", "text": "Part 1"}, {"type": "text", "text": "Part 2"}], "messages": []}`)
+		output := ensureCacheControl(input)
+
+		// cache_control should only be on the LAST element
+		res0 := gjson.GetBytes(output, "system.0.cache_control")
+		res1 := gjson.GetBytes(output, "system.1.cache_control.type")
+
+		if res0.Exists() {
+			t.Errorf("cache_control should NOT be on the first element")
+		}
+		if res1.String() != "ephemeral" {
+			t.Errorf("cache_control not found on last system element. Output: %s", string(output))
+		}
+	})
+
+	// Test case 3: Tools are cached
+	t.Run("Tools Caching", func(t *testing.T) {
+		input := []byte(`{
+			"model": "claude-3-5-sonnet",
+			"tools": [
+				{"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}},
+				{"name": "tool2", "description": "Second tool", "input_schema": {"type": "object"}}
+			],
+			"system": "System prompt",
+			"messages": []
+		}`)
+		output := ensureCacheControl(input)
+
+		// cache_control should only be on the LAST tool
+		tool0Cache := gjson.GetBytes(output, "tools.0.cache_control")
+		tool1Cache := gjson.GetBytes(output, "tools.1.cache_control.type")
+
+		if tool0Cache.Exists() {
+			t.Errorf("cache_control should NOT be on the first tool")
+		}
+		if tool1Cache.String() != "ephemeral" {
+			t.Errorf("cache_control not found on last tool. Output: %s", string(output))
+		}
+
+		// System should also have cache_control
+		systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
+		if systemCache.String() != "ephemeral" {
+			t.Errorf("cache_control not found in system. Output: %s", string(output))
+		}
+	})
+
+	// Test case 4: Tools and system are INDEPENDENT breakpoints
+	// Per Anthropic docs: Up to 4 breakpoints allowed, tools and system are cached separately
+	t.Run("Independent Cache Breakpoints", func(t *testing.T) {
+		input := []byte(`{
+			"model": "claude-3-5-sonnet",
+			"tools": [
+				{"name": "tool1", "description": "First tool", "input_schema": {"type": "object"}, "cache_control": {"type": "ephemeral"}}
+			],
+			"system": [{"type": "text", "text": "System"}],
+			"messages": []
+		}`)
+		output := ensureCacheControl(input)
+
+		// Tool already has cache_control - should not be changed
+		tool0Cache := gjson.GetBytes(output, "tools.0.cache_control.type")
+		if tool0Cache.String() != "ephemeral" {
+			t.Errorf("existing cache_control was incorrectly removed")
+		}
+
+		// System SHOULD get cache_control because it is an INDEPENDENT breakpoint
+		// Tools and system are separate cache levels in the hierarchy
+		systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
+		if systemCache.String() != "ephemeral" {
+			t.Errorf("system should have its own cache_control breakpoint (independent of tools)")
+		}
+	})
+
+	// Test case 5: Only tools, no system
+	t.Run("Only Tools No System", func(t *testing.T) {
+		input := []byte(`{
+			"model": "claude-3-5-sonnet",
+			"tools": [
+				{"name": "tool1", "description": "Tool", "input_schema": {"type": "object"}}
+			],
+			"messages": [{"role": "user", "content": "Hi"}]
+		}`)
+		output := ensureCacheControl(input)
+
+		toolCache := gjson.GetBytes(output, "tools.0.cache_control.type")
+		if toolCache.String() != "ephemeral" {
+			t.Errorf("cache_control not found on tool. Output: %s", string(output))
+		}
+	})
+
+	// Test case 6: Many tools (Claude Code scenario)
+	t.Run("Many Tools (Claude Code Scenario)", func(t *testing.T) {
+		// Simulate Claude Code with many tools
+		toolsJSON := `[`
+		for i := 0; i < 50; i++ {
+			if i > 0 {
+				toolsJSON += ","
+			}
+			toolsJSON += fmt.Sprintf(`{"name": "tool%d", "description": "Tool %d", "input_schema": {"type": "object"}}`, i, i)
+		}
+		toolsJSON += `]`
+
+		input := []byte(fmt.Sprintf(`{
+			"model": "claude-3-5-sonnet",
+			"tools": %s,
+			"system": [{"type": "text", "text": "You are Claude Code"}],
+			"messages": [{"role": "user", "content": "Hello"}]
+		}`, toolsJSON))
+
+		output := ensureCacheControl(input)
+
+		// Only the last tool (index 49) should have cache_control
+		for i := 0; i < 49; i++ {
+			path := fmt.Sprintf("tools.%d.cache_control", i)
+			if gjson.GetBytes(output, path).Exists() {
+				t.Errorf("tool %d should NOT have cache_control", i)
+			}
+		}
+
+		lastToolCache := gjson.GetBytes(output, "tools.49.cache_control.type")
+		if lastToolCache.String() != "ephemeral" {
+			t.Errorf("last tool (49) should have cache_control")
+		}
+
+		// System should also have cache_control
+		systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
+		if systemCache.String() != "ephemeral" {
+			t.Errorf("system should have cache_control")
+		}
+
+		t.Log("test passed: 50 tools - cache_control only on last tool")
+	})
+
+	// Test case 7: Empty tools array
+	t.Run("Empty Tools Array", func(t *testing.T) {
+		input := []byte(`{"model": "claude-3-5-sonnet", "tools": [], "system": "Test", "messages": []}`)
+		output := ensureCacheControl(input)
+
+		// System should still get cache_control
+		systemCache := gjson.GetBytes(output, "system.0.cache_control.type")
+		if systemCache.String() != "ephemeral" {
+			t.Errorf("system should have cache_control even with empty tools array")
+		}
+	})
+
+	// Test case 8: Messages caching for multi-turn (second-to-last user)
+	t.Run("Messages Caching Second-To-Last User", func(t *testing.T) {
+		input := []byte(`{
+			"model": "claude-3-5-sonnet",
+			"messages": [
+				{"role": "user", "content": "First user"},
+				{"role": "assistant", "content": "Assistant reply"},
+				{"role": "user", "content": "Second user"},
+				{"role": "assistant", "content": "Assistant reply 2"},
+				{"role": "user", "content": "Third user"}
+			]
+		}`)
+		output := ensureCacheControl(input)
+
+		cacheType := gjson.GetBytes(output, "messages.2.content.0.cache_control.type")
+		if cacheType.String() != "ephemeral" {
+			t.Errorf("cache_control not found on second-to-last user turn. Output: %s", string(output))
+		}
+
+		lastUserCache := gjson.GetBytes(output, "messages.4.content.0.cache_control")
+		if lastUserCache.Exists() {
+			t.Errorf("last user turn should NOT have cache_control")
+		}
+	})
+
+	// Test case 9: Existing message cache_control should skip injection
+	t.Run("Messages Skip When Cache Control Exists", func(t *testing.T) {
+		input := []byte(`{
+			"model": "claude-3-5-sonnet",
+			"messages": [
+				{"role": "user", "content": [{"type": "text", "text": "First user"}]},
+				{"role": "assistant", "content": [{"type": "text", "text": "Assistant reply", "cache_control": {"type": "ephemeral"}}]},
+				{"role": "user", "content": [{"type": "text", "text": "Second user"}]}
+			]
+		}`)
+		output := ensureCacheControl(input)
+
+		userCache := gjson.GetBytes(output, "messages.0.content.0.cache_control")
+		if userCache.Exists() {
+			t.Errorf("cache_control should NOT be injected when a message already has cache_control")
+		}
+
+		existingCache := gjson.GetBytes(output, "messages.1.content.0.cache_control.type")
+		if existingCache.String() != "ephemeral" {
+			t.Errorf("existing cache_control should be preserved. Output: %s", string(output))
+		}
+	})
+}
+
+// TestCacheControlOrder verifies the correct order: tools -> system -> messages
+func TestCacheControlOrder(t *testing.T) {
+	input := []byte(`{
+		"model": "claude-sonnet-4",
+		"tools": [
+			{"name": "Read", "description": "Read file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}}},
+			{"name": "Write", "description": "Write file", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}}}
+		],
+		"system": [
+			{"type": "text", "text": "You are Claude Code, Anthropic's official CLI for Claude."},
+			{"type": "text", "text": "Additional instructions here..."}
+		],
+		"messages": [
+			{"role": "user", "content": "Hello"}
+		]
+	}`)
+
+	output := ensureCacheControl(input)
+
+	// 1. Last tool has cache_control
+	if gjson.GetBytes(output, "tools.1.cache_control.type").String() != "ephemeral" {
+		t.Error("last tool should have cache_control")
+	}
+
+	// 2. First tool has NO cache_control
+	if gjson.GetBytes(output, "tools.0.cache_control").Exists() {
+		t.Error("first tool should NOT have cache_control")
+	}
+
+	// 3. Last system element has cache_control
+	if gjson.GetBytes(output, "system.1.cache_control.type").String() != "ephemeral" {
+		t.Error("last system element should have cache_control")
+	}
+
+	// 4. First system element has NO cache_control
+	if gjson.GetBytes(output, "system.0.cache_control").Exists() {
+		t.Error("first system element should NOT have cache_control")
+	}
+
+	t.Log("cache order correct: tools -> system")
+}
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -120,6 +120,9 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)

+	// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
+	body = ensureCacheControl(body)
+
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -252,6 +255,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)

+	// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
+	body = ensureCacheControl(body)
+
 	// Extract betas from body and convert to header
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
@@ -636,13 +642,17 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
 		ginHeaders = ginCtx.Request.Header
 	}

-	baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
+	promptCachingBeta := "prompt-caching-2024-07-31"
+	baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14," + promptCachingBeta
 	if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" {
 		baseBetas = val
 		if !strings.Contains(val, "oauth") {
 			baseBetas += ",oauth-2025-04-20"
 		}
 	}
+	if !strings.Contains(baseBetas, promptCachingBeta) {
+		baseBetas += "," + promptCachingBeta
+	}

 	// Merge extra betas from request body
 	if len(extraBetas) > 0 {
@@ -990,3 +1000,214 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A

 	return payload
 }
+
+// ensureCacheControl injects cache_control breakpoints into the payload for optimal prompt caching.
+// According to Anthropic's documentation, cache prefixes are created in order: tools -> system -> messages.
+// This function adds cache_control to:
+// 1. The LAST tool in the tools array (caches all tool definitions)
+// 2. The LAST element in the system array (caches system prompt)
+// 3. The SECOND-TO-LAST user turn (caches conversation history for multi-turn)
+//
+// Up to 4 cache breakpoints are allowed per request. Tools, System, and Messages are INDEPENDENT breakpoints.
+// This enables up to 90% cost reduction on cached tokens (cache read = 0.1x base price).
+// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
+func ensureCacheControl(payload []byte) []byte {
+	// 1. Inject cache_control into the LAST tool (caches all tool definitions)
+	// Tools are cached first in the hierarchy, so this is the most important breakpoint.
+	payload = injectToolsCacheControl(payload)
+
+	// 2. Inject cache_control into the LAST system prompt element
+	// System is the second level in the cache hierarchy.
+	payload = injectSystemCacheControl(payload)
+
+	// 3. Inject cache_control into messages for multi-turn conversation caching
+	// This caches the conversation history up to the second-to-last user turn.
+	payload = injectMessagesCacheControl(payload)
+
+	return payload
+}
+
+// injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching.
+// Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache."
+// This enables caching of conversation history, which is especially beneficial for long multi-turn conversations.
+// Only adds cache_control if:
+// - There are at least 2 user turns in the conversation
+// - No message content already has cache_control
+func injectMessagesCacheControl(payload []byte) []byte {
+	messages := gjson.GetBytes(payload, "messages")
+	if !messages.Exists() || !messages.IsArray() {
+		return payload
+	}
+
+	// Check if ANY message content already has cache_control
+	hasCacheControlInMessages := false
+	messages.ForEach(func(_, msg gjson.Result) bool {
+		content := msg.Get("content")
+		if content.IsArray() {
+			content.ForEach(func(_, item gjson.Result) bool {
+				if item.Get("cache_control").Exists() {
+					hasCacheControlInMessages = true
+					return false
+				}
+				return true
+			})
+		}
+		return !hasCacheControlInMessages
+	})
+	if hasCacheControlInMessages {
+		return payload
+	}
+
+	// Find all user message indices
+	var userMsgIndices []int
+	messages.ForEach(func(index gjson.Result, msg gjson.Result) bool {
+		if msg.Get("role").String() == "user" {
+			userMsgIndices = append(userMsgIndices, int(index.Int()))
+		}
+		return true
+	})
+
+	// Need at least 2 user turns to cache the second-to-last
+	if len(userMsgIndices) < 2 {
+		return payload
+	}
+
+	// Get the second-to-last user message index
+	secondToLastUserIdx := userMsgIndices[len(userMsgIndices)-2]
+
+	// Get the content of this message
+	contentPath := fmt.Sprintf("messages.%d.content", secondToLastUserIdx)
+	content := gjson.GetBytes(payload, contentPath)
+
+	if content.IsArray() {
+		// Add cache_control to the last content block of this message
+		contentCount := int(content.Get("#").Int())
+		if contentCount > 0 {
+			cacheControlPath := fmt.Sprintf("messages.%d.content.%d.cache_control", secondToLastUserIdx, contentCount-1)
+			result, err := sjson.SetBytes(payload, cacheControlPath, map[string]string{"type": "ephemeral"})
+			if err != nil {
+				log.Warnf("failed to inject cache_control into messages: %v", err)
+				return payload
+			}
+			payload = result
+		}
+	} else if content.Type == gjson.String {
+		// Convert string content to array with cache_control
+		text := content.String()
+		newContent := []map[string]interface{}{
+			{
+				"type": "text",
+				"text": text,
+				"cache_control": map[string]string{
+					"type": "ephemeral",
+				},
+			},
+		}
+		result, err := sjson.SetBytes(payload, contentPath, newContent)
+		if err != nil {
+			log.Warnf("failed to inject cache_control into message string content: %v", err)
+			return payload
+		}
+		payload = result
+	}
+
+	return payload
+}
+
+// injectToolsCacheControl adds cache_control to the last tool in the tools array.
+// Per Anthropic docs: "The cache_control parameter on the last tool definition caches all tool definitions."
+// This only adds cache_control if NO tool in the array already has it.
+func injectToolsCacheControl(payload []byte) []byte {
+	tools := gjson.GetBytes(payload, "tools")
+	if !tools.Exists() || !tools.IsArray() {
+		return payload
+	}
+
+	toolCount := int(tools.Get("#").Int())
+	if toolCount == 0 {
+		return payload
+	}
+
+	// Check if ANY tool already has cache_control - if so, don't modify tools
+	hasCacheControlInTools := false
+	tools.ForEach(func(_, tool gjson.Result) bool {
+		if tool.Get("cache_control").Exists() {
+			hasCacheControlInTools = true
+			return false
+		}
+		return true
+	})
+	if hasCacheControlInTools {
+		return payload
+	}
+
+	// Add cache_control to the last tool
+	lastToolPath := fmt.Sprintf("tools.%d.cache_control", toolCount-1)
+	result, err := sjson.SetBytes(payload, lastToolPath, map[string]string{"type": "ephemeral"})
+	if err != nil {
+		log.Warnf("failed to inject cache_control into tools array: %v", err)
+		return payload
+	}
+
+	return result
+}
+
+// injectSystemCacheControl adds cache_control to the last element in the system prompt.
+// Converts string system prompts to array format if needed.
+// This only adds cache_control if NO system element already has it.
+func injectSystemCacheControl(payload []byte) []byte {
+	system := gjson.GetBytes(payload, "system")
+	if !system.Exists() {
+		return payload
+	}
+
+	if system.IsArray() {
+		count := int(system.Get("#").Int())
+		if count == 0 {
+			return payload
+		}
+
+		// Check if ANY system element already has cache_control
+		hasCacheControlInSystem := false
+		system.ForEach(func(_, item gjson.Result) bool {
+			if item.Get("cache_control").Exists() {
+				hasCacheControlInSystem = true
+				return false
+			}
+			return true
+		})
+		if hasCacheControlInSystem {
+			return payload
+		}
+
+		// Add cache_control to the last system element
+		lastSystemPath := fmt.Sprintf("system.%d.cache_control", count-1)
+		result, err := sjson.SetBytes(payload, lastSystemPath, map[string]string{"type": "ephemeral"})
+		if err != nil {
+			log.Warnf("failed to inject cache_control into system array: %v", err)
+			return payload
+		}
+		payload = result
+	} else if system.Type == gjson.String {
+		// Convert string system prompt to array with cache_control
+		// "system": "text" -> "system": [{"type": "text", "text": "text", "cache_control": {"type": "ephemeral"}}]
+		text := system.String()
+		newSystem := []map[string]interface{}{
+			{
+				"type": "text",
+				"text": text,
+				"cache_control": map[string]string{
+					"type": "ephemeral",
+				},
+			},
+		}
+		result, err := sjson.SetBytes(payload, "system", newSystem)
+		if err != nil {
+			log.Warnf("failed to inject cache_control into system string: %v", err)
+			return payload
+		}
+		payload = result
+	}
+
+	return payload
+}
--- a/internal/runtime/executor/github_copilot_executor.go
+++ b/internal/runtime/executor/github_copilot_executor.go
@@ -17,6 +17,7 @@ import (
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )

@@ -134,6 +135,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	}
 	e.applyHeaders(httpReq, apiToken)

+	// Add Copilot-Vision-Request header if the request contains vision content
+	if detectVisionContent(body) {
+		httpReq.Header.Set("Copilot-Vision-Request", "true")
+	}
+
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -238,6 +244,11 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	}
 	e.applyHeaders(httpReq, apiToken)

+	// Add Copilot-Vision-Request header if the request contains vision content
+	if detectVisionContent(body) {
+		httpReq.Header.Set("Copilot-Vision-Request", "true")
+	}
+
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -415,6 +426,34 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string) {
 	r.Header.Set("X-Request-Id", uuid.NewString())
 }

+// detectVisionContent checks if the request body contains vision/image content.
+// Returns true if the request includes image_url or image type content blocks.
+func detectVisionContent(body []byte) bool {
+	// Parse messages array
+	messagesResult := gjson.GetBytes(body, "messages")
+	if !messagesResult.Exists() || !messagesResult.IsArray() {
+		return false
+	}
+
+	// Check each message for vision content
+	for _, message := range messagesResult.Array() {
+		content := message.Get("content")
+
+		// If content is an array, check each content block
+		if content.IsArray() {
+			for _, block := range content.Array() {
+				blockType := block.Get("type").String()
+				// Check for image_url or image type
+				if blockType == "image_url" || blockType == "image" {
+					return true
+				}
+			}
+		}
+	}
+
+	return false
+}
+
 // normalizeModel is a no-op as GitHub Copilot accepts model names directly.
 // Model mapping should be done at the registry level if needed.
 func (e *GitHubCopilotExecutor) normalizeModel(_ string, body []byte) []byte {
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -35,7 +35,7 @@ import (

 const (
 	// Kiro API common constants
-	kiroContentType  = "application/x-amz-json-1.0"
+	kiroContentType  = "application/json"
 	kiroAcceptStream = "*/*"

 	// Event Stream frame size constants for boundary protection
@@ -47,17 +47,18 @@ const (
 	// Event Stream error type constants
 	ErrStreamFatal     = "fatal"     // Connection/authentication errors, not recoverable
 	ErrStreamMalformed = "malformed" // Format errors, data cannot be parsed
-	// kiroUserAgent matches amq2api format for User-Agent header (Amazon Q CLI style)
+
+	// kiroUserAgent matches Amazon Q CLI style for User-Agent header
 	kiroUserAgent = "aws-sdk-rust/1.3.9 os/macos lang/rust/1.87.0"
-	// kiroFullUserAgent is the complete x-amz-user-agent header matching amq2api (Amazon Q CLI style)
+	// kiroFullUserAgent is the complete x-amz-user-agent header (Amazon Q CLI style)
 	kiroFullUserAgent = "aws-sdk-rust/1.3.9 ua/2.1 api/ssooidc/1.88.0 os/macos lang/rust/1.87.0 m/E app/AmazonQ-For-CLI"

-	// Kiro IDE style headers (from kiro2api - for IDC auth)
-	kiroIDEUserAgent     = "aws-sdk-js/1.0.18 ua/2.1 os/darwin#25.0.0 lang/js md/nodejs#20.16.0 api/codewhispererstreaming#1.0.18 m/E KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
-	kiroIDEAmzUserAgent  = "aws-sdk-js/1.0.18 KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
-	kiroIDEAgentModeSpec = "spec"
+	// Kiro IDE style headers for IDC auth
+	kiroIDEUserAgent     = "aws-sdk-js/1.0.27 ua/2.1 os/win32#10.0.19044 lang/js md/nodejs#22.21.1 api/codewhispererstreaming#1.0.27 m/E"
+	kiroIDEAmzUserAgent  = "aws-sdk-js/1.0.27"
+	kiroIDEAgentModeVibe = "vibe"

-	// Socket retry configuration constants (based on kiro2Api reference implementation)
+	// Socket retry configuration constants
 	// Maximum number of retry attempts for socket/network errors
 	kiroSocketMaxRetries = 3
 	// Base delay between retry attempts (uses exponential backoff: delay * 2^attempt)
@@ -104,13 +105,13 @@ func getGlobalFingerprintManager() *kiroauth.FingerprintManager {
 // retryConfig holds configuration for socket retry logic.
 // Based on kiro2Api Python implementation patterns.
 type retryConfig struct {
-	MaxRetries       int           // Maximum number of retry attempts
-	BaseDelay        time.Duration // Base delay between retries (exponential backoff)
-	MaxDelay         time.Duration // Maximum delay cap
-	RetryableErrors  []string      // List of retryable error patterns
-	RetryableStatus  map[int]bool  // HTTP status codes to retry
-	FirstTokenTmout  time.Duration // Timeout for first token in streaming
-	StreamReadTmout  time.Duration // Timeout between stream chunks
+	MaxRetries      int           // Maximum number of retry attempts
+	BaseDelay       time.Duration // Base delay between retries (exponential backoff)
+	MaxDelay        time.Duration // Maximum delay cap
+	RetryableErrors []string      // List of retryable error patterns
+	RetryableStatus map[int]bool  // HTTP status codes to retry
+	FirstTokenTmout time.Duration // Timeout for first token in streaming
+	StreamReadTmout time.Duration // Timeout between stream chunks
 }

 // defaultRetryConfig returns the default retry configuration for Kiro socket operations.
@@ -334,52 +335,111 @@ type kiroEndpointConfig struct {
 	Name      string // Endpoint name for logging
 }

-// kiroEndpointConfigs defines the available Kiro API endpoints with their compatible configurations.
-// The order determines fallback priority: primary endpoint first, then fallbacks.
-//
-// CRITICAL: Each endpoint MUST use its compatible Origin and AmzTarget values:
-// - CodeWhisperer endpoint (codewhisperer.us-east-1.amazonaws.com): Uses AI_EDITOR origin and AmazonCodeWhispererStreamingService target
-// - Amazon Q endpoint (q.us-east-1.amazonaws.com): Uses CLI origin and AmazonQDeveloperStreamingService target
-//
-// Mismatched combinations will result in 403 Forbidden errors.
-//
-// NOTE: CodeWhisperer is set as the default endpoint because:
-// 1. Most tokens come from Kiro IDE / VSCode extensions (AWS Builder ID auth)
-// 2. These tokens use AI_EDITOR origin which is only compatible with CodeWhisperer endpoint
-// 3. Amazon Q endpoint requires CLI origin which is for Amazon Q CLI tokens
-// This matches the AIClient-2-API-main project's configuration.
-var kiroEndpointConfigs = []kiroEndpointConfig{
-	{
-		URL:       "https://codewhisperer.us-east-1.amazonaws.com/generateAssistantResponse",
-		Origin:    "AI_EDITOR",
-		AmzTarget: "AmazonCodeWhispererStreamingService.GenerateAssistantResponse",
-		Name:      "CodeWhisperer",
-	},
-	{
-		URL:       "https://q.us-east-1.amazonaws.com/",
-		Origin:    "CLI",
-		AmzTarget: "AmazonQDeveloperStreamingService.SendMessage",
-		Name:      "AmazonQ",
-	},
+// kiroDefaultRegion is the default AWS region for Kiro API endpoints.
+// Used when no region is specified in auth metadata.
+const kiroDefaultRegion = "us-east-1"
+
+// extractRegionFromProfileARN extracts the AWS region from a ProfileARN.
+// ARN format: arn:aws:codewhisperer:REGION:ACCOUNT:profile/PROFILE_ID
+// Returns empty string if region cannot be extracted.
+func extractRegionFromProfileARN(profileArn string) string {
+	if profileArn == "" {
+		return ""
+	}
+	parts := strings.Split(profileArn, ":")
+	if len(parts) >= 4 && parts[3] != "" {
+		return parts[3]
+	}
+	return ""
 }

+// buildKiroEndpointConfigs creates endpoint configurations for the specified region.
+// This enables dynamic region support for Enterprise/IdC users in non-us-east-1 regions.
+//
+// Uses Q endpoint (q.{region}.amazonaws.com) as primary for ALL auth types:
+// - Works universally across all AWS regions (CodeWhisperer endpoint only exists in us-east-1)
+// - Uses /generateAssistantResponse path with AI_EDITOR origin
+// - Does NOT require X-Amz-Target header
+//
+// The AmzTarget field is kept for backward compatibility but should be empty
+// to indicate that the header should NOT be set.
+func buildKiroEndpointConfigs(region string) []kiroEndpointConfig {
+	if region == "" {
+		region = kiroDefaultRegion
+	}
+	return []kiroEndpointConfig{
+		{
+			// Primary: Q endpoint - works for all regions and auth types
+			URL:       fmt.Sprintf("https://q.%s.amazonaws.com/generateAssistantResponse", region),
+			Origin:    "AI_EDITOR",
+			AmzTarget: "", // Empty = don't set X-Amz-Target header
+			Name:      "AmazonQ",
+		},
+		{
+			// Fallback: CodeWhisperer endpoint (legacy, only works in us-east-1)
+			URL:       fmt.Sprintf("https://codewhisperer.%s.amazonaws.com/generateAssistantResponse", region),
+			Origin:    "AI_EDITOR",
+			AmzTarget: "AmazonCodeWhispererStreamingService.GenerateAssistantResponse",
+			Name:      "CodeWhisperer",
+		},
+	}
+}
+
+// kiroEndpointConfigs is kept for backward compatibility with default us-east-1 region.
+// Prefer using buildKiroEndpointConfigs(region) for dynamic region support.
+var kiroEndpointConfigs = buildKiroEndpointConfigs(kiroDefaultRegion)
+
 // getKiroEndpointConfigs returns the list of Kiro API endpoint configurations to try in order.
+// Supports dynamic region based on auth metadata "api_region", "profile_arn", or "region" field.
 // Supports reordering based on "preferred_endpoint" in auth metadata/attributes.
-// For IDC auth method, automatically uses CodeWhisperer endpoint with CLI origin.
+//
+// Region priority:
+// 1. auth.Metadata["api_region"] - explicit API region override
+// 2. ProfileARN region - extracted from arn:aws:service:REGION:account:resource
+// 3. kiroDefaultRegion (us-east-1) - fallback
+// Note: OIDC "region" is NOT used - it's for token refresh, not API calls
 func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig {
 	if auth == nil {
 		return kiroEndpointConfigs
 	}

-	// For IDC auth, use CodeWhisperer endpoint with AI_EDITOR origin (same as Social auth)
-	// Based on kiro2api analysis: IDC tokens work with CodeWhisperer endpoint using Bearer auth
+	// Determine API region with priority: api_region > profile_arn > region > default
+	region := kiroDefaultRegion
+	regionSource := "default"
+
+	if auth.Metadata != nil {
+		// Priority 1: Explicit api_region override
+		if r, ok := auth.Metadata["api_region"].(string); ok && r != "" {
+			region = r
+			regionSource = "api_region"
+		} else {
+			// Priority 2: Extract from ProfileARN
+			if profileArn, ok := auth.Metadata["profile_arn"].(string); ok && profileArn != "" {
+				if arnRegion := extractRegionFromProfileARN(profileArn); arnRegion != "" {
+					region = arnRegion
+					regionSource = "profile_arn"
+				}
+			}
+			// Note: OIDC "region" field is NOT used for API endpoint
+			// Kiro API only exists in us-east-1, while OIDC region can vary (e.g., ap-northeast-2)
+			// Using OIDC region for API calls causes DNS failures
+		}
+	}
+
+	log.Debugf("kiro: using region %s (source: %s)", region, regionSource)
+
+	// Build endpoint configs for the specified region
+	endpointConfigs := buildKiroEndpointConfigs(region)
+
+	// For IDC auth, use Q endpoint with AI_EDITOR origin
+	// IDC tokens work with Q endpoint using Bearer auth
 	// The difference is only in how tokens are refreshed (OIDC with clientId/clientSecret for IDC)
 	// NOT in how API calls are made - both Social and IDC use the same endpoint/origin
 	if auth.Metadata != nil {
 		authMethod, _ := auth.Metadata["auth_method"].(string)
-		if authMethod == "idc" {
-			log.Debugf("kiro: IDC auth, using CodeWhisperer endpoint")
-			return kiroEndpointConfigs
+		if strings.ToLower(authMethod) == "idc" {
+			log.Debugf("kiro: IDC auth, using Q endpoint (region: %s)", region)
+			return endpointConfigs
 		}
 	}

@@ -396,7 +456,7 @@ func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig {
 	}

 	if preference == "" {
-		return kiroEndpointConfigs
+		return endpointConfigs
 	}

 	preference = strings.ToLower(strings.TrimSpace(preference))
@@ -405,7 +465,7 @@ func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig {
 	var sorted []kiroEndpointConfig
 	var remaining []kiroEndpointConfig

-	for _, cfg := range kiroEndpointConfigs {
+	for _, cfg := range endpointConfigs {
 		name := strings.ToLower(cfg.Name)
 		// Check for matches
 		// CodeWhisperer aliases: codewhisperer, ide
@@ -426,7 +486,7 @@ func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig {

 	// If preference didn't match anything, return default
 	if len(sorted) == 0 {
-		return kiroEndpointConfigs
+		return endpointConfigs
 	}

 	// Combine: preferred first, then others
@@ -445,7 +505,7 @@ func isIDCAuth(auth *cliproxyauth.Auth) bool {
 		return false
 	}
 	authMethod, _ := auth.Metadata["auth_method"].(string)
-	return authMethod == "idc"
+	return strings.ToLower(authMethod) == "idc"
 }

 // buildKiroPayloadForFormat builds the Kiro API payload based on the source format.
@@ -482,12 +542,12 @@ func applyDynamicFingerprint(req *http.Request, auth *cliproxyauth.Auth) {
 		// Get token-specific fingerprint for dynamic UA generation
 		tokenKey := getTokenKey(auth)
 		fp := getGlobalFingerprintManager().GetFingerprint(tokenKey)
-		
+
 		// Use fingerprint-generated dynamic User-Agent
 		req.Header.Set("User-Agent", fp.BuildUserAgent())
 		req.Header.Set("X-Amz-User-Agent", fp.BuildAmzUserAgent())
-		req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
-		
+		req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeVibe)
+
 		log.Debugf("kiro: using dynamic fingerprint for token %s (SDK:%s, OS:%s/%s, Kiro:%s)",
 			tokenKey[:8]+"...", fp.SDKVersion, fp.OSType, fp.OSVersion, fp.KiroVersion)
 	} else {
@@ -506,10 +566,10 @@ func (e *KiroExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth
 	if strings.TrimSpace(accessToken) == "" {
 		return statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
 	}
-	
+
 	// Apply dynamic fingerprint-based headers
 	applyDynamicFingerprint(req, auth)
-	
+
 	req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 	req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
 	req.Header.Set("Authorization", "Bearer "+accessToken)
@@ -665,12 +725,17 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.

 			httpReq.Header.Set("Content-Type", kiroContentType)
 			httpReq.Header.Set("Accept", kiroAcceptStream)
-			// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
-			httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
+			// Only set X-Amz-Target if specified (Q endpoint doesn't require it)
+			if endpointConfig.AmzTarget != "" {
+				httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
+			}
+			// Kiro-specific headers
+			httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeVibe)
+			httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")

 			// Apply dynamic fingerprint-based headers
 			applyDynamicFingerprint(httpReq, auth)
-			
+
 			httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 			httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())

@@ -910,30 +975,36 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
 			}

 			// Fallback for usage if missing from upstream
-			if usageInfo.TotalTokens == 0 {
+
+			// 1. Estimate InputTokens if missing
+			if usageInfo.InputTokens == 0 {
 				if enc, encErr := getTokenizer(req.Model); encErr == nil {
 					if inp, countErr := countOpenAIChatTokens(enc, opts.OriginalRequest); countErr == nil {
 						usageInfo.InputTokens = inp
 					}
 				}
-				if len(content) > 0 {
-					// Use tiktoken for more accurate output token calculation
-					if enc, encErr := getTokenizer(req.Model); encErr == nil {
-						if tokenCount, countErr := enc.Count(content); countErr == nil {
-							usageInfo.OutputTokens = int64(tokenCount)
-						}
-					}
-					// Fallback to character count estimation if tiktoken fails
-					if usageInfo.OutputTokens == 0 {
-						usageInfo.OutputTokens = int64(len(content) / 4)
-						if usageInfo.OutputTokens == 0 {
-							usageInfo.OutputTokens = 1
-						}
+			}
+
+			// 2. Estimate OutputTokens if missing and content is available
+			if usageInfo.OutputTokens == 0 && len(content) > 0 {
+				// Use tiktoken for more accurate output token calculation
+				if enc, encErr := getTokenizer(req.Model); encErr == nil {
+					if tokenCount, countErr := enc.Count(content); countErr == nil {
+						usageInfo.OutputTokens = int64(tokenCount)
+					}
+				}
+				// Fallback to character count estimation if tiktoken fails
+				if usageInfo.OutputTokens == 0 {
+					usageInfo.OutputTokens = int64(len(content) / 4)
+					if usageInfo.OutputTokens == 0 {
+						usageInfo.OutputTokens = 1
 					}
 				}
-				usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
 			}

+			// 3. Update TotalTokens
+			usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
+
 			appendAPIResponseChunk(ctx, e.cfg, []byte(content))
 			reporter.publish(ctx, usageInfo)

@@ -1074,12 +1145,17 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox

 			httpReq.Header.Set("Content-Type", kiroContentType)
 			httpReq.Header.Set("Accept", kiroAcceptStream)
-			// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
-			httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
+			// Only set X-Amz-Target if specified (Q endpoint doesn't require it)
+			if endpointConfig.AmzTarget != "" {
+				httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
+			}
+			// Kiro-specific headers
+			httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeVibe)
+			httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")

 			// Apply dynamic fingerprint-based headers
 			applyDynamicFingerprint(httpReq, auth)
-			
+
 			httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 			httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())

@@ -1537,11 +1613,27 @@ func determineAgenticMode(model string) (isAgentic, isChatOnly bool) {
 }

 // getEffectiveProfileArn determines if profileArn should be included based on auth method.
-// profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO).
+// profileArn is only needed for social auth (Google OAuth), not for AWS SSO OIDC (Builder ID/IDC).
+//
+// Detection logic (matching kiro-openai-gateway):
+// 1. Check auth_method field: "builder-id" or "idc"
+// 2. Check auth_type field: "aws_sso_oidc" (from kiro-cli tokens)
+// 3. Check for client_id + client_secret presence (AWS SSO OIDC signature)
 func getEffectiveProfileArn(auth *cliproxyauth.Auth, profileArn string) string {
 	if auth != nil && auth.Metadata != nil {
-		if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" {
-			return "" // Don't include profileArn for builder-id auth
+		// Check 1: auth_method field (from CLIProxyAPI tokens)
+		if authMethod, ok := auth.Metadata["auth_method"].(string); ok && (authMethod == "builder-id" || authMethod == "idc") {
+			return "" // AWS SSO OIDC - don't include profileArn
+		}
+		// Check 2: auth_type field (from kiro-cli tokens)
+		if authType, ok := auth.Metadata["auth_type"].(string); ok && authType == "aws_sso_oidc" {
+			return "" // AWS SSO OIDC - don't include profileArn
+		}
+		// Check 3: client_id + client_secret presence (AWS SSO OIDC signature)
+		_, hasClientID := auth.Metadata["client_id"].(string)
+		_, hasClientSecret := auth.Metadata["client_secret"].(string)
+		if hasClientID && hasClientSecret {
+			return "" // AWS SSO OIDC - don't include profileArn
 		}
 	}
 	return profileArn
@@ -1550,14 +1642,32 @@ func getEffectiveProfileArn(auth *cliproxyauth.Auth, profileArn string) string {
 // getEffectiveProfileArnWithWarning determines if profileArn should be included based on auth method,
 // and logs a warning if profileArn is missing for non-builder-id auth.
 // This consolidates the auth_method check that was previously done separately.
+//
+// AWS SSO OIDC (Builder ID/IDC) users don't need profileArn - sending it causes 403 errors.
+// Only Kiro Desktop (social auth like Google/GitHub) users need profileArn.
+//
+// Detection logic (matching kiro-openai-gateway):
+// 1. Check auth_method field: "builder-id" or "idc"
+// 2. Check auth_type field: "aws_sso_oidc" (from kiro-cli tokens)
+// 3. Check for client_id + client_secret presence (AWS SSO OIDC signature)
 func getEffectiveProfileArnWithWarning(auth *cliproxyauth.Auth, profileArn string) string {
 	if auth != nil && auth.Metadata != nil {
+		// Check 1: auth_method field (from CLIProxyAPI tokens)
 		if authMethod, ok := auth.Metadata["auth_method"].(string); ok && (authMethod == "builder-id" || authMethod == "idc") {
-			// builder-id and idc auth don't need profileArn
-			return ""
+			return "" // AWS SSO OIDC - don't include profileArn
+		}
+		// Check 2: auth_type field (from kiro-cli tokens)
+		if authType, ok := auth.Metadata["auth_type"].(string); ok && authType == "aws_sso_oidc" {
+			return "" // AWS SSO OIDC - don't include profileArn
+		}
+		// Check 3: client_id + client_secret presence (AWS SSO OIDC signature, like kiro-openai-gateway)
+		_, hasClientID := auth.Metadata["client_id"].(string)
+		_, hasClientSecret := auth.Metadata["client_secret"].(string)
+		if hasClientID && hasClientSecret {
+			return "" // AWS SSO OIDC - don't include profileArn
 		}
 	}
-	// For non-builder-id/idc auth (social auth), profileArn is required
+	// For social auth (Kiro Desktop), profileArn is required
 	if profileArn == "" {
 		log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
 	}
@@ -2332,8 +2442,8 @@ func (e *KiroExecutor) extractEventTypeFromBytes(headers []byte) string {
 func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter, thinkingEnabled bool) {
 	reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers
 	var totalUsage usage.Detail
-	var hasToolUses bool          // Track if any tool uses were emitted
-	var upstreamStopReason string // Track stop_reason from upstream events
+	var hasToolUses bool              // Track if any tool uses were emitted
+	var upstreamStopReason string     // Track stop_reason from upstream events

 	// Tool use state tracking for input buffering and deduplication
 	processedIDs := make(map[string]bool)
@@ -3111,12 +3221,92 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 			_ = signature // Signature can be used for verification if needed

 		case "toolUseEvent":
+			// Debug: log raw toolUseEvent payload for large tool inputs
+			if log.IsLevelEnabled(log.DebugLevel) {
+				payloadStr := string(payload)
+				if len(payloadStr) > 500 {
+					payloadStr = payloadStr[:500] + "...[truncated]"
+				}
+				log.Debugf("kiro: raw toolUseEvent payload (%d bytes): %s", len(payload), payloadStr)
+			}
 			// Handle dedicated tool use events with input buffering
 			completedToolUses, newState := kiroclaude.ProcessToolUseEvent(event, currentToolUse, processedIDs)
 			currentToolUse = newState

 			// Emit completed tool uses
 			for _, tu := range completedToolUses {
+				// Check for truncated write marker - emit as a Bash tool that echoes the error
+				// This way Claude Code will execute it, see the error, and the agent can retry
+				if tu.Name == "__truncated_write__" {
+					filePath := ""
+					if fp, ok := tu.Input["file_path"].(string); ok && fp != "" {
+						filePath = fp
+					}
+
+					// Create a Bash tool that echoes the error message
+					// This will be executed by Claude Code and the agent will see the result
+					var errorMsg string
+					if filePath != "" {
+						errorMsg = fmt.Sprintf("echo '[WRITE TOOL ERROR] The file content for \"%s\" is too large to be transmitted by the upstream API. You MUST retry by writing the file in smaller chunks: First use Write to create the file with the first 700 lines, then use multiple Edit operations to append the remaining content in chunks of ~700 lines each.'", filePath)
+					} else {
+						errorMsg = "echo '[WRITE TOOL ERROR] The file content is too large to be transmitted by the upstream API. The Write tool input was truncated. You MUST retry by writing the file in smaller chunks: First use Write to create the file with the first 700 lines, then use multiple Edit operations to append the remaining content in chunks of ~700 lines each.'"
+					}
+
+					log.Warnf("kiro: converting truncated write to Bash echo for file: %s", filePath)
+
+					hasToolUses = true
+
+					// Close text block if open
+					if isTextBlockOpen && contentBlockIndex >= 0 {
+						blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex)
+						sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
+						for _, chunk := range sseData {
+							if chunk != "" {
+								out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+							}
+						}
+						isTextBlockOpen = false
+					}
+
+					contentBlockIndex++
+
+					// Emit as Bash tool_use
+					blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", tu.ToolUseID, "Bash")
+					sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+					for _, chunk := range sseData {
+						if chunk != "" {
+							out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+						}
+					}
+
+					// Emit the Bash command as input
+					bashInput := map[string]interface{}{
+						"command": errorMsg,
+					}
+					inputJSON, err := json.Marshal(bashInput)
+					if err != nil {
+						log.Errorf("kiro: failed to marshal bash input for truncated write error: %v", err)
+						continue
+					}
+					inputDelta := kiroclaude.BuildClaudeInputJsonDeltaEvent(string(inputJSON), contentBlockIndex)
+					sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam)
+					for _, chunk := range sseData {
+						if chunk != "" {
+							out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+						}
+					}
+
+					blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex)
+					sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
+					for _, chunk := range sseData {
+						if chunk != "" {
+							out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+						}
+					}
+
+					continue // Skip the normal tool_use emission
+				}
+
 				hasToolUses = true

 				// Close text block if open
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -155,10 +155,13 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 						clientContentJSON, _ = sjson.SetRaw(clientContentJSON, "parts.-1", partJSON)
 					} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "text" {
 						prompt := contentResult.Get("text").String()
-						partJSON := `{}`
-						if prompt != "" {
-							partJSON, _ = sjson.Set(partJSON, "text", prompt)
+						// Skip empty text parts to avoid Gemini API error:
+						// "required oneof field 'data' must have one initialized field"
+						if prompt == "" {
+							continue
 						}
+						partJSON := `{}`
+						partJSON, _ = sjson.Set(partJSON, "text", prompt)
 						clientContentJSON, _ = sjson.SetRaw(clientContentJSON, "parts.-1", partJSON)
 					} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "tool_use" {
 						// NOTE: Do NOT inject dummy thinking blocks here.
@@ -285,6 +288,13 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 					}
 				}

+				// Skip messages with empty parts array to avoid Gemini API error:
+				// "required oneof field 'data' must have one initialized field"
+				partsCheck := gjson.Get(clientContentJSON, "parts")
+				if !partsCheck.IsArray() || len(partsCheck.Array()) == 0 {
+					continue
+				}
+
 				contentsJSON, _ = sjson.SetRaw(contentsJSON, "-1", clientContentJSON)
 				hasContents = true
 			} else if contentsResult.Type == gjson.String {
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -305,12 +305,14 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		}
 	}

-	// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
+	// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch/codeExecution/urlContext passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
 		functionToolNode := []byte(`{}`)
 		hasFunction := false
 		googleSearchNodes := make([][]byte, 0)
+		codeExecutionNodes := make([][]byte, 0)
+		urlContextNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -370,8 +372,28 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 				}
 				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
+			if ce := t.Get("code_execution"); ce.Exists() {
+				codeToolNode := []byte(`{}`)
+				var errSet error
+				codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
+				if errSet != nil {
+					log.Warnf("Failed to set codeExecution tool: %v", errSet)
+					continue
+				}
+				codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
+			}
+			if uc := t.Get("url_context"); uc.Exists() {
+				urlToolNode := []byte(`{}`)
+				var errSet error
+				urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
+				if errSet != nil {
+					log.Warnf("Failed to set urlContext tool: %v", errSet)
+					continue
+				}
+				urlContextNodes = append(urlContextNodes, urlToolNode)
+			}
 		}
-		if hasFunction || len(googleSearchNodes) > 0 {
+		if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
 			toolsNode := []byte("[]")
 			if hasFunction {
 				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
@@ -379,6 +401,12 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 			for _, googleNode := range googleSearchNodes {
 				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
 			}
+			for _, codeNode := range codeExecutionNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
+			}
+			for _, urlNode := range urlContextNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
+			}
 			out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
 		}
 	}
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -283,12 +283,14 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		}
 	}

-	// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch passthrough
+	// tools -> request.tools[].functionDeclarations + request.tools[].googleSearch/codeExecution/urlContext passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
 		functionToolNode := []byte(`{}`)
 		hasFunction := false
 		googleSearchNodes := make([][]byte, 0)
+		codeExecutionNodes := make([][]byte, 0)
+		urlContextNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -348,8 +350,28 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 				}
 				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
+			if ce := t.Get("code_execution"); ce.Exists() {
+				codeToolNode := []byte(`{}`)
+				var errSet error
+				codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
+				if errSet != nil {
+					log.Warnf("Failed to set codeExecution tool: %v", errSet)
+					continue
+				}
+				codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
+			}
+			if uc := t.Get("url_context"); uc.Exists() {
+				urlToolNode := []byte(`{}`)
+				var errSet error
+				urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
+				if errSet != nil {
+					log.Warnf("Failed to set urlContext tool: %v", errSet)
+					continue
+				}
+				urlContextNodes = append(urlContextNodes, urlToolNode)
+			}
 		}
-		if hasFunction || len(googleSearchNodes) > 0 {
+		if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
 			toolsNode := []byte("[]")
 			if hasFunction {
 				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
@@ -357,6 +379,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 			for _, googleNode := range googleSearchNodes {
 				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
 			}
+			for _, codeNode := range codeExecutionNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
+			}
+			for _, urlNode := range urlContextNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
+			}
 			out, _ = sjson.SetRawBytes(out, "request.tools", toolsNode)
 		}
 	}
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -289,12 +289,14 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 		}
 	}

-	// tools -> tools[].functionDeclarations + tools[].googleSearch passthrough
+	// tools -> tools[].functionDeclarations + tools[].googleSearch/codeExecution/urlContext passthrough
 	tools := gjson.GetBytes(rawJSON, "tools")
 	if tools.IsArray() && len(tools.Array()) > 0 {
 		functionToolNode := []byte(`{}`)
 		hasFunction := false
 		googleSearchNodes := make([][]byte, 0)
+		codeExecutionNodes := make([][]byte, 0)
+		urlContextNodes := make([][]byte, 0)
 		for _, t := range tools.Array() {
 			if t.Get("type").String() == "function" {
 				fn := t.Get("function")
@@ -354,8 +356,28 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 				}
 				googleSearchNodes = append(googleSearchNodes, googleToolNode)
 			}
+			if ce := t.Get("code_execution"); ce.Exists() {
+				codeToolNode := []byte(`{}`)
+				var errSet error
+				codeToolNode, errSet = sjson.SetRawBytes(codeToolNode, "codeExecution", []byte(ce.Raw))
+				if errSet != nil {
+					log.Warnf("Failed to set codeExecution tool: %v", errSet)
+					continue
+				}
+				codeExecutionNodes = append(codeExecutionNodes, codeToolNode)
+			}
+			if uc := t.Get("url_context"); uc.Exists() {
+				urlToolNode := []byte(`{}`)
+				var errSet error
+				urlToolNode, errSet = sjson.SetRawBytes(urlToolNode, "urlContext", []byte(uc.Raw))
+				if errSet != nil {
+					log.Warnf("Failed to set urlContext tool: %v", errSet)
+					continue
+				}
+				urlContextNodes = append(urlContextNodes, urlToolNode)
+			}
 		}
-		if hasFunction || len(googleSearchNodes) > 0 {
+		if hasFunction || len(googleSearchNodes) > 0 || len(codeExecutionNodes) > 0 || len(urlContextNodes) > 0 {
 			toolsNode := []byte("[]")
 			if hasFunction {
 				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", functionToolNode)
@@ -363,6 +385,12 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 			for _, googleNode := range googleSearchNodes {
 				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", googleNode)
 			}
+			for _, codeNode := range codeExecutionNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", codeNode)
+			}
+			for _, urlNode := range urlContextNodes {
+				toolsNode, _ = sjson.SetRawBytes(toolsNode, "-1", urlNode)
+			}
 			out, _ = sjson.SetRawBytes(out, "tools", toolsNode)
 		}
 	}
--- a/internal/translator/kiro/claude/kiro_claude_tools.go
+++ b/internal/translator/kiro/claude/kiro_claude_tools.go
@@ -395,6 +395,17 @@ func ProcessToolUseEvent(event map[string]interface{}, currentToolUse *ToolUseSt
 		isStop = stop
 	}

+	// Debug: log when stop event arrives
+	if isStop {
+		log.Debugf("kiro: toolUseEvent stop=true received for tool %s (ID: %s), currentToolUse buffer len: %d",
+			toolName, toolUseID, func() int {
+				if currentToolUse != nil {
+					return currentToolUse.InputBuffer.Len()
+				}
+				return -1
+			}())
+	}
+
 	// Get input - can be string (fragment) or object (complete)
 	var inputFragment string
 	var inputMap map[string]interface{}
@@ -466,12 +477,92 @@ func ProcessToolUseEvent(event map[string]interface{}, currentToolUse *ToolUseSt
 	if isStop && currentToolUse != nil {
 		fullInput := currentToolUse.InputBuffer.String()

+		// Check for Write tool with empty or missing input - this happens when Kiro API
+		// completely skips sending input for large file writes
+		if currentToolUse.Name == "Write" && len(strings.TrimSpace(fullInput)) == 0 {
+			log.Warnf("kiro: Write tool received no input from upstream API. The file content may be too large to transmit.")
+			// Return nil to skip this tool use - it will be handled as a truncation error
+			// The caller should emit a text block explaining the error instead
+			if processedIDs != nil {
+				processedIDs[currentToolUse.ToolUseID] = true
+			}
+			log.Infof("kiro: skipping Write tool use %s due to empty input (content too large)", currentToolUse.ToolUseID)
+			// Return a special marker tool use that indicates truncation
+			toolUse := KiroToolUse{
+				ToolUseID: currentToolUse.ToolUseID,
+				Name:      "__truncated_write__", // Special marker name
+				Input: map[string]interface{}{
+					"error": "Write tool input was not transmitted by upstream API. The file content is too large.",
+				},
+			}
+			toolUses = append(toolUses, toolUse)
+			return toolUses, nil
+		}
+
 		// Repair and parse the accumulated JSON
 		repairedJSON := RepairJSON(fullInput)
 		var finalInput map[string]interface{}
 		if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil {
 			log.Warnf("kiro: failed to parse accumulated tool input: %v, raw: %s", err, fullInput)
 			finalInput = make(map[string]interface{})
+
+			// Check if this is a Write tool with truncated input (missing content field)
+			// This happens when the Kiro API truncates large tool inputs
+			if currentToolUse.Name == "Write" && strings.Contains(fullInput, "file_path") && !strings.Contains(fullInput, "content") {
+				log.Warnf("kiro: Write tool input was truncated by upstream API (content field missing). The file content may be too large.")
+				// Extract file_path if possible for error context
+				filePath := ""
+				if idx := strings.Index(fullInput, "file_path"); idx >= 0 {
+					// Try to extract the file path value
+					rest := fullInput[idx:]
+					if colonIdx := strings.Index(rest, ":"); colonIdx >= 0 {
+						rest = strings.TrimSpace(rest[colonIdx+1:])
+						if len(rest) > 0 && rest[0] == '"' {
+							rest = rest[1:]
+							if endQuote := strings.Index(rest, "\""); endQuote >= 0 {
+								filePath = rest[:endQuote]
+							}
+						}
+					}
+				}
+				if processedIDs != nil {
+					processedIDs[currentToolUse.ToolUseID] = true
+				}
+				// Return a special marker tool use that indicates truncation
+				toolUse := KiroToolUse{
+					ToolUseID: currentToolUse.ToolUseID,
+					Name:      "__truncated_write__", // Special marker name
+					Input: map[string]interface{}{
+						"error":     "Write tool content was truncated by upstream API. The file content is too large.",
+						"file_path": filePath,
+					},
+				}
+				toolUses = append(toolUses, toolUse)
+				return toolUses, nil
+			}
+		}
+
+		// Additional check: Write tool parsed successfully but missing content field
+		if currentToolUse.Name == "Write" {
+			if _, hasContent := finalInput["content"]; !hasContent {
+				if filePath, hasPath := finalInput["file_path"]; hasPath {
+					log.Warnf("kiro: Write tool input missing 'content' field, likely truncated by upstream API")
+					if processedIDs != nil {
+						processedIDs[currentToolUse.ToolUseID] = true
+					}
+					// Return a special marker tool use that indicates truncation
+					toolUse := KiroToolUse{
+						ToolUseID: currentToolUse.ToolUseID,
+						Name:      "__truncated_write__", // Special marker name
+						Input: map[string]interface{}{
+							"error":     "Write tool content field was missing. The file content is too large.",
+							"file_path": filePath,
+						},
+					}
+					toolUses = append(toolUses, toolUse)
+					return toolUses, nil
+				}
+			}
 		}

 		toolUse := KiroToolUse{
--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -4,6 +4,7 @@ package util
 import (
 	"fmt"
 	"sort"
+	"strconv"
 	"strings"

 	"github.com/tidwall/gjson"
@@ -431,9 +432,54 @@ func removeUnsupportedKeywords(jsonStr string) string {
 			jsonStr, _ = sjson.Delete(jsonStr, p)
 		}
 	}
+	// Remove x-* extension fields (e.g., x-google-enum-descriptions) that are not supported by Gemini API
+	jsonStr = removeExtensionFields(jsonStr)
 	return jsonStr
 }

+// removeExtensionFields removes all x-* extension fields from the JSON schema.
+// These are OpenAPI/JSON Schema extension fields that Google APIs don't recognize.
+func removeExtensionFields(jsonStr string) string {
+	var paths []string
+	walkForExtensions(gjson.Parse(jsonStr), "", &paths)
+	// walkForExtensions returns paths in a way that deeper paths are added before their ancestors
+	// when they are not deleted wholesale, but since we skip children of deleted x-* nodes,
+	// any collected path is safe to delete. We still use DeleteBytes for efficiency.
+
+	b := []byte(jsonStr)
+	for _, p := range paths {
+		b, _ = sjson.DeleteBytes(b, p)
+	}
+	return string(b)
+}
+
+func walkForExtensions(value gjson.Result, path string, paths *[]string) {
+	if value.IsArray() {
+		arr := value.Array()
+		for i := len(arr) - 1; i >= 0; i-- {
+			walkForExtensions(arr[i], joinPath(path, strconv.Itoa(i)), paths)
+		}
+		return
+	}
+
+	if value.IsObject() {
+		value.ForEach(func(key, val gjson.Result) bool {
+			keyStr := key.String()
+			safeKey := escapeGJSONPathKey(keyStr)
+			childPath := joinPath(path, safeKey)
+
+			// If it's an extension field, we delete it and don't need to look at its children.
+			if strings.HasPrefix(keyStr, "x-") && !isPropertyDefinition(path) {
+				*paths = append(*paths, childPath)
+				return true
+			}
+
+			walkForExtensions(val, childPath, paths)
+			return true
+		})
+	}
+}
+
 func cleanupRequiredFields(jsonStr string) string {
 	for _, p := range findPaths(jsonStr, "required") {
 		parentPath := trimSuffix(p, ".required")
--- a/internal/util/gemini_schema_test.go
+++ b/internal/util/gemini_schema_test.go
@@ -869,3 +869,129 @@ func TestCleanJSONSchemaForAntigravity_BooleanEnumToString(t *testing.T) {
 		t.Errorf("Boolean enum values should be converted to string format, got: %s", result)
 	}
 }
+
+func TestRemoveExtensionFields(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name: "removes x- fields at root",
+			input: `{
+				"type": "object",
+				"x-custom-meta": "value",
+				"properties": {
+					"foo": { "type": "string" }
+				}
+			}`,
+			expected: `{
+				"type": "object",
+				"properties": {
+					"foo": { "type": "string" }
+				}
+			}`,
+		},
+		{
+			name: "removes x- fields in nested properties",
+			input: `{
+				"type": "object",
+				"properties": {
+					"foo": {
+						"type": "string",
+						"x-internal-id": 123
+					}
+				}
+			}`,
+			expected: `{
+				"type": "object",
+				"properties": {
+					"foo": {
+						"type": "string"
+					}
+				}
+			}`,
+		},
+		{
+			name: "does NOT remove properties named x-",
+			input: `{
+				"type": "object",
+				"properties": {
+					"x-data": { "type": "string" },
+					"normal": { "type": "number", "x-meta": "remove" }
+				},
+				"required": ["x-data"]
+			}`,
+			expected: `{
+				"type": "object",
+				"properties": {
+					"x-data": { "type": "string" },
+					"normal": { "type": "number" }
+				},
+				"required": ["x-data"]
+			}`,
+		},
+		{
+			name: "does NOT remove $schema and other meta fields (as requested)",
+			input: `{
+				"$schema": "http://json-schema.org/draft-07/schema#",
+				"$id": "test",
+				"type": "object",
+				"properties": {
+					"foo": { "type": "string" }
+				}
+			}`,
+			expected: `{
+				"$schema": "http://json-schema.org/draft-07/schema#",
+				"$id": "test",
+				"type": "object",
+				"properties": {
+					"foo": { "type": "string" }
+				}
+			}`,
+		},
+		{
+			name: "handles properties named $schema",
+			input: `{
+				"type": "object",
+				"properties": {
+					"$schema": { "type": "string" }
+				}
+			}`,
+			expected: `{
+				"type": "object",
+				"properties": {
+					"$schema": { "type": "string" }
+				}
+			}`,
+		},
+		{
+			name: "handles escaping in paths",
+			input: `{
+				"type": "object",
+				"properties": {
+					"foo.bar": {
+						"type": "string",
+						"x-meta": "remove"
+					}
+				},
+				"x-root.meta": "remove"
+			}`,
+			expected: `{
+				"type": "object",
+				"properties": {
+					"foo.bar": {
+						"type": "string"
+					}
+				}
+			}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			actual := removeExtensionFields(tt.input)
+			compareJSON(t, tt.expected, actual)
+		})
+	}
+}
--- a/sdk/auth/claude.go
+++ b/sdk/auth/claude.go
@@ -176,13 +176,16 @@ waitForCallback:
 	}

 	if result.State != state {
+		log.Errorf("State mismatch: expected %s, got %s", state, result.State)
 		return nil, claude.NewAuthenticationError(claude.ErrInvalidState, fmt.Errorf("state mismatch"))
 	}

 	log.Debug("Claude authorization code received; exchanging for tokens")
+	log.Debugf("Code: %s, State: %s", result.Code[:min(20, len(result.Code))], state)

 	authBundle, err := authSvc.ExchangeCodeForTokens(ctx, result.Code, state, pkceCodes)
 	if err != nil {
+		log.Errorf("Token exchange failed: %v", err)
 		return nil, claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, err)
 	}

--- a/sdk/auth/kiro.go
+++ b/sdk/auth/kiro.go
@@ -2,7 +2,10 @@ package auth

 import (
 	"context"
+	"encoding/json"
 	"fmt"
+	"os"
+	"path/filepath"
 	"strings"
 	"time"

@@ -279,18 +282,19 @@ func (a *KiroAuthenticator) ImportFromKiroIDE(ctx context.Context, cfg *config.C
 		CreatedAt: now,
 		UpdatedAt: now,
 		Metadata: map[string]any{
-			"type":          "kiro",
-			"access_token":  tokenData.AccessToken,
-			"refresh_token": tokenData.RefreshToken,
-			"profile_arn":   tokenData.ProfileArn,
-			"expires_at":    tokenData.ExpiresAt,
-			"auth_method":   tokenData.AuthMethod,
-			"provider":      tokenData.Provider,
-			"client_id":     tokenData.ClientID,
-			"client_secret": tokenData.ClientSecret,
-			"email":         tokenData.Email,
-			"region":        tokenData.Region,
-			"start_url":     tokenData.StartURL,
+			"type":           "kiro",
+			"access_token":   tokenData.AccessToken,
+			"refresh_token":  tokenData.RefreshToken,
+			"profile_arn":    tokenData.ProfileArn,
+			"expires_at":     tokenData.ExpiresAt,
+			"auth_method":    tokenData.AuthMethod,
+			"provider":       tokenData.Provider,
+			"client_id":      tokenData.ClientID,
+			"client_secret":  tokenData.ClientSecret,
+			"client_id_hash": tokenData.ClientIDHash,
+			"email":          tokenData.Email,
+			"region":         tokenData.Region,
+			"start_url":      tokenData.StartURL,
 		},
 		Attributes: map[string]string{
 			"profile_arn": tokenData.ProfileArn,
@@ -325,10 +329,21 @@ func (a *KiroAuthenticator) Refresh(ctx context.Context, cfg *config.Config, aut

 	clientID, _ := auth.Metadata["client_id"].(string)
 	clientSecret, _ := auth.Metadata["client_secret"].(string)
+	clientIDHash, _ := auth.Metadata["client_id_hash"].(string)
 	authMethod, _ := auth.Metadata["auth_method"].(string)
 	startURL, _ := auth.Metadata["start_url"].(string)
 	region, _ := auth.Metadata["region"].(string)

+	// For Enterprise Kiro IDE (IDC auth), try to load clientId/clientSecret from device registration
+	// if they are missing from metadata. This handles the case where token was imported without
+	// clientId/clientSecret but has clientIdHash.
+	if (clientID == "" || clientSecret == "") && clientIDHash != "" {
+		if loadedClientID, loadedClientSecret, err := loadDeviceRegistrationCredentials(clientIDHash); err == nil {
+			clientID = loadedClientID
+			clientSecret = loadedClientSecret
+		}
+	}
+
 	var tokenData *kiroauth.KiroTokenData
 	var err error

@@ -339,8 +354,8 @@ func (a *KiroAuthenticator) Refresh(ctx context.Context, cfg *config.Config, aut
 	case clientID != "" && clientSecret != "" && authMethod == "idc" && region != "":
 		// IDC refresh with region-specific endpoint
 		tokenData, err = ssoClient.RefreshTokenWithRegion(ctx, clientID, clientSecret, refreshToken, region, startURL)
-	case clientID != "" && clientSecret != "" && authMethod == "builder-id":
-		// Builder ID refresh with default endpoint
+	case clientID != "" && clientSecret != "" && (authMethod == "builder-id" || authMethod == "idc"):
+		// Builder ID or IDC refresh with default endpoint (us-east-1)
 		tokenData, err = ssoClient.RefreshToken(ctx, clientID, clientSecret, refreshToken)
 	default:
 		// Fallback to Kiro's refresh endpoint (for social auth: Google/GitHub)
@@ -367,8 +382,54 @@ func (a *KiroAuthenticator) Refresh(ctx context.Context, cfg *config.Config, aut
 	updated.Metadata["refresh_token"] = tokenData.RefreshToken
 	updated.Metadata["expires_at"] = tokenData.ExpiresAt
 	updated.Metadata["last_refresh"] = now.Format(time.RFC3339) // For double-check optimization
+	// Store clientId/clientSecret if they were loaded from device registration
+	if clientID != "" && updated.Metadata["client_id"] == nil {
+		updated.Metadata["client_id"] = clientID
+	}
+	if clientSecret != "" && updated.Metadata["client_secret"] == nil {
+		updated.Metadata["client_secret"] = clientSecret
+	}
 	// NextRefreshAfter: 20 minutes before expiry
 	updated.NextRefreshAfter = expiresAt.Add(-20 * time.Minute)

 	return updated, nil
 }
+
+// loadDeviceRegistrationCredentials loads clientId and clientSecret from device registration file.
+// This is used when refreshing tokens that were imported without clientId/clientSecret.
+func loadDeviceRegistrationCredentials(clientIDHash string) (clientID, clientSecret string, err error) {
+	if clientIDHash == "" {
+		return "", "", fmt.Errorf("clientIdHash is empty")
+	}
+
+	// Sanitize clientIdHash to prevent path traversal
+	if strings.Contains(clientIDHash, "/") || strings.Contains(clientIDHash, "\\") || strings.Contains(clientIDHash, "..") {
+		return "", "", fmt.Errorf("invalid clientIdHash: contains path separator")
+	}
+
+	homeDir, err := os.UserHomeDir()
+	if err != nil {
+		return "", "", fmt.Errorf("failed to get home directory: %w", err)
+	}
+
+	deviceRegPath := filepath.Join(homeDir, ".aws", "sso", "cache", clientIDHash+".json")
+	data, err := os.ReadFile(deviceRegPath)
+	if err != nil {
+		return "", "", fmt.Errorf("failed to read device registration file: %w", err)
+	}
+
+	var deviceReg struct {
+		ClientID     string `json:"clientId"`
+		ClientSecret string `json:"clientSecret"`
+	}
+
+	if err := json.Unmarshal(data, &deviceReg); err != nil {
+		return "", "", fmt.Errorf("failed to parse device registration: %w", err)
+	}
+
+	if deviceReg.ClientID == "" || deviceReg.ClientSecret == "" {
+		return "", "", fmt.Errorf("device registration missing clientId or clientSecret")
+	}
+
+	return deviceReg.ClientID, deviceReg.ClientSecret, nil
+}
--- a/sdk/cliproxy/auth/types.go
+++ b/sdk/cliproxy/auth/types.go
@@ -329,8 +329,8 @@ func (a *Auth) AccountInfo() (string, string) {
 		}
 	}

-	// For GitHub provider, return username
-	if strings.ToLower(a.Provider) == "github" {
+	// For GitHub provider (including github-copilot), return username
+	if strings.HasPrefix(strings.ToLower(a.Provider), "github") {
 		if a.Metadata != nil {
 			if username, ok := a.Metadata["username"].(string); ok {
 				username = strings.TrimSpace(username)
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -1416,29 +1416,44 @@ func (s *Service) fetchKiroModels(a *coreauth.Auth) []*ModelInfo {
 }

 // extractKiroTokenData extracts KiroTokenData from auth attributes and metadata.
+// It supports both config-based tokens (stored in Attributes) and file-based tokens (stored in Metadata).
 func (s *Service) extractKiroTokenData(a *coreauth.Auth) *kiroauth.KiroTokenData {
-	if a == nil || a.Attributes == nil {
+	if a == nil {
 		return nil
 	}

-	accessToken := strings.TrimSpace(a.Attributes["access_token"])
+	var accessToken, profileArn, refreshToken string
+
+	// Priority 1: Try to get from Attributes (config.yaml source)
+	if a.Attributes != nil {
+		accessToken = strings.TrimSpace(a.Attributes["access_token"])
+		profileArn = strings.TrimSpace(a.Attributes["profile_arn"])
+		refreshToken = strings.TrimSpace(a.Attributes["refresh_token"])
+	}
+
+	// Priority 2: If not found in Attributes, try Metadata (JSON file source)
+	if accessToken == "" && a.Metadata != nil {
+		if at, ok := a.Metadata["access_token"].(string); ok {
+			accessToken = strings.TrimSpace(at)
+		}
+		if pa, ok := a.Metadata["profile_arn"].(string); ok {
+			profileArn = strings.TrimSpace(pa)
+		}
+		if rt, ok := a.Metadata["refresh_token"].(string); ok {
+			refreshToken = strings.TrimSpace(rt)
+		}
+	}
+
+	// access_token is required
 	if accessToken == "" {
 		return nil
 	}

-	tokenData := &kiroauth.KiroTokenData{
-		AccessToken: accessToken,
-		ProfileArn:  strings.TrimSpace(a.Attributes["profile_arn"]),
+	return &kiroauth.KiroTokenData{
+		AccessToken:  accessToken,
+		ProfileArn:   profileArn,
+		RefreshToken: refreshToken,
 	}
-
-	// Also try to get refresh token from metadata
-	if a.Metadata != nil {
-		if rt, ok := a.Metadata["refresh_token"].(string); ok {
-			tokenData.RefreshToken = rt
-		}
-	}
-
-	return tokenData
 }

 // convertKiroAPIModels converts Kiro API models to ModelInfo slice.
Author	SHA1	Message	Date
Luis Pater	e662c020a9	Merge branch 'router-for-me:main' into main	2026-01-31 01:43:48 +08:00
Luis Pater	7ff3936efe	fix(caching): ensure prompt-caching beta is always appended and add multi-turn cache control tests	2026-01-31 01:42:58 +08:00
Luis Pater	29594086c0	chore(docs): add links to mainline repository in README files	2026-01-31 01:24:29 +08:00
Luis Pater	b0433c9f2a	chore(docs): update image source and config URLs in README files	2026-01-31 01:22:28 +08:00
Luis Pater	b1204b1423	Merge branch 'router-for-me:main' into main	2026-01-31 01:15:14 +08:00
Luis Pater	43ca112fff	Merge pull request #157 from crossly/bugfix/kiro-token-extraction-from-metadata fix(kiro): Support token extraction from Metadata for file-based authentication	2026-01-31 01:14:28 +08:00
Luis Pater	24cf7fa6a2	Merge pull request #156 from taetaetae/fix/kiro-api-region fix(kiro): Do not use OIDC region for API endpoint	2026-01-31 01:13:47 +08:00
Luis Pater	bf66bcad86	Merge pull request #155 from PancakeZik/feature/use-q-endpoint feat(kiro): switch to Amazon Q endpoint as primary	2026-01-31 01:13:15 +08:00
Luis Pater	f36a5f5654	Merge pull request #1294 from Darley-Wey/fix/claude2gemini fix: skip empty text parts and messages to avoid Gemini API error	2026-01-31 01:05:41 +08:00
Luis Pater	c1facdff67	Merge pull request #1295 from SchneeMart/feature/claude-caching feat(caching): implement Claude prompt caching with multi-turn support	2026-01-31 01:04:19 +08:00
ricky	0263f9d35b	Restore README files	2026-01-31 00:21:17 +08:00
ricky	101498e737	Fix: Support token extraction from Metadata for file-based Kiro auth - Modified extractKiroTokenData to support both Attributes and Metadata sources - Fixes issue where JSON file-based tokens were not being read correctly - FileSynthesizer stores tokens in Metadata, ConfigSynthesizer uses Attributes - Now checks Attributes first (config.yaml), falls back to Metadata (JSON files) - Ensures dynamic model fetching works for all Kiro authentication methods - Prevents fallback to static model list that incorrectly includes opus for free accounts	2026-01-31 00:15:35 +08:00
Luis Pater	4ee46bc9f2	Merge pull request #1311 from router-for-me/fix/gemini-schema fix(gemini): Removes unsupported extension fields	2026-01-30 23:55:56 +08:00
Luis Pater	c3e94a8277	Merge pull request #1317 from yinkev/feat/gemini-tools-passthrough feat(translator): add code_execution and url_context tool passthrough	2026-01-30 23:46:44 +08:00
taetaetae	fafef32b9e	fix(kiro): Do not use OIDC region for API endpoint Kiro API endpoints only exist in us-east-1, but OIDC region can vary by Enterprise user location (e.g., ap-northeast-2 for Korean users). Previously, when ProfileARN was not available, the code fell back to using OIDC region for API calls, causing DNS resolution failures: lookup codewhisperer.ap-northeast-2.amazonaws.com: no such host This fix removes the OIDC region fallback for API endpoints. The region priority is now: 1. api_region (explicit override) 2. ProfileARN region 3. us-east-1 (default) Fixes: Issue #253 (200-400x slower response times due to DNS failures)	2026-01-31 00:05:53 +09:00
Joao	1e764de0a8	feat(kiro): switch to Amazon Q endpoint as primary Switch from CodeWhisperer endpoint to Amazon Q endpoint for all auth types: - Use q.{region}.amazonaws.com/generateAssistantResponse as primary endpoint - Works universally across all AWS regions (CodeWhisperer only exists in us-east-1) - Use application/json Content-Type instead of application/x-amz-json-1.0 - Remove X-Amz-Target header for Q endpoint (not required) - Add x-amzn-kiro-agent-mode: vibe header - Add x-amzn-codewhisperer-optout: true header - Keep CodeWhisperer endpoint as fallback for compatibility This change aligns with Amazon's consolidation of services under the Q branding and provides better multi-region support for Enterprise/IDC users.	2026-01-30 13:50:19 +00:00
Luis Pater	b3b8d71dfc	Merge pull request #154 from router-for-me/plus v6.7.32	2026-01-30 21:34:38 +08:00
Luis Pater	ca29c42805	Merge branch 'main' into plus	2026-01-30 21:34:30 +08:00
Luis Pater	fcefa2c820	Merge pull request #152 from taetaetae/feat/kiro-dynamic-region-support feat(kiro): Add dynamic region support for API endpoints	2026-01-30 21:30:04 +08:00
Luis Pater	6b6d030ed3	feat(auth): add custom HTTP client with utls for Claude API authentication Introduce a custom HTTP client utilizing utls with Firefox TLS fingerprinting to bypass Cloudflare fingerprinting on Anthropic domains. Includes support for proxy configuration and enhanced connection management for HTTP/2.	2026-01-30 21:29:41 +08:00
Luis Pater	fd5b669c87	Merge pull request #150 from PancakeZik/fix/write-tool-truncation-handling fix: handle Write tool truncation when content exceeds API limits	2026-01-30 21:15:31 +08:00
Luis Pater	30d832c9b1	Merge pull request #144 from woopencri/main fix: handle zero output_tokens for kiro non-streaming requests	2026-01-30 21:06:20 +08:00
Luis Pater	2448691136	Merge pull request #143 from CheesesNguyen/fix/kiro-refresh-token fix: refresh token for kiro enterprise account	2026-01-30 21:05:00 +08:00
taetaetae	e7cd7b5243	fix: Support separate OIDC and API regions via ProfileARN extraction Address @Xm798's feedback: OIDC region may differ from API region in some Enterprise setups (e.g., OIDC in us-east-2, API in us-east-1). Region priority (highest to lowest): 1. api_region - explicit override for API endpoint region 2. ProfileARN - extract region from arn:aws:service:REGION:account:resource 3. region - OIDC/Identity region (fallback) 4. us-east-1 - default Changes: - Add extractRegionFromProfileARN() to parse region from ARN - Update getKiroEndpointConfigs() with 4-level region priority - Add regionSource logging for debugging	2026-01-30 21:52:02 +09:00
Luis Pater	33f89a2609	Merge pull request #140 from janckerchen/fix/github-copilot-logging fix: support github-copilot provider in AccountInfo logging	2026-01-30 20:51:50 +08:00
Luis Pater	403a731e22	Merge pull request #139 from janckerchen/fix/github-copilot-vision-header fix: add Copilot-Vision-Request header for vision content	2026-01-30 20:51:18 +08:00
taetaetae	9293c685e0	fix: Correct Amazon Q endpoint URL path Revert the Amazon Q endpoint path to root '/' instead of '/generateAssistantResponse'. The '/generateAssistantResponse' path is only for CodeWhisperer endpoint with 'GenerateAssistantResponse' target. Amazon Q endpoint uses 'SendMessage' target which requires the root path. Thanks to @gemini-code-assist for catching this copy-paste error.	2026-01-30 16:30:03 +09:00
taetaetae	38094a2339	feat(kiro): Add dynamic region support for API endpoints ## Problem - Kiro API endpoints were hardcoded to us-east-1 region - Enterprise users in other regions (e.g., ap-northeast-2) experienced significant latency (200-400x slower) due to cross-region API calls - This is the API endpoint counterpart to quotio PR #241 which fixed token refresh endpoints ## Solution - Add buildKiroEndpointConfigs(region) function for dynamic endpoint generation - Extract region from auth.Metadata["region"] field - Fallback to us-east-1 for backward compatibility - Use case-insensitive authMethod comparison (consistent with quotio PR #252) ## Changes - Add kiroDefaultRegion constant - Convert hardcoded endpoint URLs to dynamic fmt.Sprintf with region - Update getKiroEndpointConfigs to extract and use region from auth - Fix isIDCAuth to use case-insensitive comparison ## Testing - Backward compatible: defaults to us-east-1 when no region specified - Enterprise users can now use their local region endpoints Related: - quotio PR #241: Dynamic region for token refresh (merged) - quotio PR #252: authMethod case-insensitive fix - quotio Issue #253: Performance issue report	2026-01-30 16:25:32 +09:00
kyinhub	538039f583	feat(translator): add code_execution and url_context tool passthrough Add support for Gemini's code_execution and url_context tools in the request translators, enabling: - Agentic Vision: Image analysis with Python code execution for bounding boxes, annotations, and visual reasoning - URL Context: Live web page content fetching and analysis Tools are passed through using the same pattern as google_search: - code_execution: {} -> codeExecution: {} - url_context: {} -> urlContext: {} Tested with Gemini 3 Flash Preview agentic vision successfully. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-29 21:14:52 -08:00
이대희	ca796510e9	refactor(gemini): optimize removeExtensionFields with post-order traversal and DeleteBytes Amp-Thread-ID: https://ampcode.com/threads/T-019c0d09-330d-7399-b794-652b94847df1 Co-authored-by: Amp <amp@ampcode.com>	2026-01-30 13:02:58 +09:00
이대희	d0d66cdcb7	fix(gemini): Removes unsupported extension fields Removes x-* extension fields from JSON schemas to ensure compatibility with the Gemini API. These fields, while valid in OpenAPI/JSON Schema, are not recognized by the Gemini API and can cause issues. The change recursively walks the schema, identifies these extension fields, and removes them, except when they define properties. Amp-Thread-ID: https://ampcode.com/threads/T-019c0cd1-9e59-722b-83f0-e0582aba6914 Co-authored-by: Amp <amp@ampcode.com>	2026-01-30 12:31:26 +09:00
Martin Schneeweiss	3a43ecb19b	feat(caching): implement Claude prompt caching with multi-turn support - Add ensureCacheControl() to auto-inject cache breakpoints - Cache tools (last tool), system (last element), and messages (2nd-to-last user turn) - Add prompt-caching-2024-07-31 beta header - Return original payload on sjson error to prevent corruption - Include verification test for caching logic Enables up to 90% cost reduction on cached tokens. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-29 22:59:33 +01:00
Joao	876b86ff91	fix: handle json.Marshal error for truncated write bash input	2026-01-29 13:07:20 +00:00
Joao	acdfa1c87f	fix: handle Write tool truncation when content exceeds API limits When the Kiro/AWS CodeWhisperer API receives a Write tool request with content that exceeds transmission limits, it truncates the tool input. This can result in: - Empty input buffer (no input transmitted at all) - Missing 'content' field in the parsed JSON - Incomplete JSON that fails to parse This fix detects these truncation scenarios and converts them to Bash tool calls that echo an error message. This allows Claude Code to execute the Bash command, see the error output, and the agent can then retry with smaller chunks. Changes: - kiro_claude_tools.go: Detect three truncation scenarios in ProcessToolUseEvent: 1. Empty input buffer (no input transmitted) 2. JSON parse failure with file_path but no content field 3. Successfully parsed JSON missing content field When detected, emit a special '__truncated_write__' marker tool use - kiro_executor.go: Handle '__truncated_write__' markers in streamToChannel: 1. Extract file_path from the marker for context 2. Create a Bash tool_use that echoes an error message 3. Include retry guidance (700-line chunks recommended) 4. Set hasToolUses=true to ensure stop_reason='tool_use' for agent continuation This ensures the agent continues and can retry with smaller file chunks instead of failing silently or showing errors to the user.	2026-01-29 12:22:55 +00:00
Darley	2666708c30	fix: skip empty text parts and messages to avoid Gemini API error When Claude API sends an assistant message with empty text content like: {"role":"assistant","content":[{"type":"text","text":""}]} The translator was creating a part object {} with no data field, causing Gemini API to return error: "required oneof field 'data' must have one initialized field" This fix: 1. Skips empty text parts (text="") during translation 2. Skips entire messages when their parts array becomes empty This ensures compatibility when clients send empty assistant messages in their conversation history.	2026-01-29 04:13:07 +08:00
woopencri	f2b0ce13d9	fix: handle zero output_tokens for kiro non-streaming requests	2026-01-28 16:27:34 +08:00
CheesesNguyen	b8652b7387	feat: normalize authentication method to lowercase for case-insensitive matching during token refresh and introduce new CLIProxyAPIPlus component.	2026-01-28 14:54:58 +07:00
CheesesNguyen	b18b2ebe9f	fix: Implement graceful token refresh degradation and enhance IDC SSO support with device registration loading for Kiro.	2026-01-28 14:47:04 +07:00
cybit	58290760a9	fix: support github-copilot provider in AccountInfo logging Changed the provider matching logic in AccountInfo() method to use prefix matching instead of exact matching. This allows both 'github' (Kiro OAuth) and 'github-copilot' providers to be correctly identified as OAuth providers, enabling proper debug logging output. Before: Use OAuth logs were missing for github-copilot requests After: Logs show "Use OAuth provider=github-copilot auth_file=..." Co-Authored-By: Claude (claude-sonnet-4.5) <noreply@anthropic.com>	2026-01-27 21:56:00 +08:00
cybit	33ab3a99f0	fix: add Copilot-Vision-Request header for vision requests Problem: GitHub Copilot API returns 400 error "missing required Copilot-Vision-Request header for vision requests" when requests contain image content blocks, even though the requests are valid Claude API calls. Root Cause: The GitHub Copilot executor was not detecting vision content in requests and did not add the required `Copilot-Vision-Request: true` header. Solution: - Added `detectVisionContent()` function to check for image_url/image content blocks - Automatically add `Copilot-Vision-Request: true` header when vision content is detected - Applied fix to both `Execute()` and `ExecuteStream()` methods Testing: - Tested with Claude Code IDE requests containing code context screenshots - Vision requests now succeed instead of failing with 400 errors - Non-vision requests remain unchanged Fixes issue where GitHub Copilot executor fails all vision-enabled requests, causing unnecessary fallback to other providers and 0% utilization. Co-Authored-By: Claude (claude-sonnet-4.5) <noreply@anthropic.com>	2026-01-27 15:13:54 +08:00