Merge pull request #65 from router-for-me/plus

v6.6.52
Merge branch 'main' into plus
2026-03-09 23:33:24 +00:00 · 2025-12-24 22:28:45 +08:00 · 2025-12-24 22:27:12 +08:00 · 2025-12-24 22:24:39 +08:00 · 2025-12-24 22:03:07 +08:00 · 2025-12-24 21:58:55 +08:00
40 changed files with 2495 additions and 468 deletions
--- a/assets/cubence.png
+++ b/assets/cubence.png
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -78,6 +78,11 @@ routing:
 # When true, enable authentication for the WebSocket API (/v1/ws).
 ws-auth: false

+# Streaming behavior (SSE keep-alives + safe bootstrap retries).
+# streaming:
+#   keepalive-seconds: 15   # Default: 0 (disabled). <= 0 disables keep-alives.
+#   bootstrap-retries: 1    # Default: 0 (disabled). Retries before first byte is sent.
+
 # Gemini API keys
 # gemini-api-key:
 #   - api-key: "AIzaSy...01"
--- a/internal/api/handlers/management/logs.go
+++ b/internal/api/handlers/management/logs.go
@@ -209,6 +209,94 @@ func (h *Handler) GetRequestErrorLogs(c *gin.Context) {
 	c.JSON(http.StatusOK, gin.H{"files": files})
 }

+// GetRequestLogByID finds and downloads a request log file by its request ID.
+// The ID is matched against the suffix of log file names (format: *-{requestID}.log).
+func (h *Handler) GetRequestLogByID(c *gin.Context) {
+	if h == nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "handler unavailable"})
+		return
+	}
+	if h.cfg == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "configuration unavailable"})
+		return
+	}
+
+	dir := h.logDirectory()
+	if strings.TrimSpace(dir) == "" {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "log directory not configured"})
+		return
+	}
+
+	requestID := strings.TrimSpace(c.Param("id"))
+	if requestID == "" {
+		requestID = strings.TrimSpace(c.Query("id"))
+	}
+	if requestID == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "missing request ID"})
+		return
+	}
+	if strings.ContainsAny(requestID, "/\\") {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request ID"})
+		return
+	}
+
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			c.JSON(http.StatusNotFound, gin.H{"error": "log directory not found"})
+			return
+		}
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to list log directory: %v", err)})
+		return
+	}
+
+	suffix := "-" + requestID + ".log"
+	var matchedFile string
+	for _, entry := range entries {
+		if entry.IsDir() {
+			continue
+		}
+		name := entry.Name()
+		if strings.HasSuffix(name, suffix) {
+			matchedFile = name
+			break
+		}
+	}
+
+	if matchedFile == "" {
+		c.JSON(http.StatusNotFound, gin.H{"error": "log file not found for the given request ID"})
+		return
+	}
+
+	dirAbs, errAbs := filepath.Abs(dir)
+	if errAbs != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to resolve log directory: %v", errAbs)})
+		return
+	}
+	fullPath := filepath.Clean(filepath.Join(dirAbs, matchedFile))
+	prefix := dirAbs + string(os.PathSeparator)
+	if !strings.HasPrefix(fullPath, prefix) {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid log file path"})
+		return
+	}
+
+	info, errStat := os.Stat(fullPath)
+	if errStat != nil {
+		if os.IsNotExist(errStat) {
+			c.JSON(http.StatusNotFound, gin.H{"error": "log file not found"})
+			return
+		}
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to read log file: %v", errStat)})
+		return
+	}
+	if info.IsDir() {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid log file"})
+		return
+	}
+
+	c.FileAttachment(fullPath, matchedFile)
+}
+
 // DownloadRequestErrorLog downloads a specific error request log file by name.
 func (h *Handler) DownloadRequestErrorLog(c *gin.Context) {
 	if h == nil {
--- a/internal/api/middleware/request_logging.go
+++ b/internal/api/middleware/request_logging.go
@@ -98,10 +98,11 @@ func captureRequestInfo(c *gin.Context) (*RequestInfo, error) {
 	}

 	return &RequestInfo{
-		URL:     url,
-		Method:  method,
-		Headers: headers,
-		Body:    body,
+		URL:       url,
+		Method:    method,
+		Headers:   headers,
+		Body:      body,
+		RequestID: logging.GetGinRequestID(c),
 	}, nil
 }

--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -15,10 +15,11 @@ import (

 // RequestInfo holds essential details of an incoming HTTP request for logging purposes.
 type RequestInfo struct {
-	URL     string              // URL is the request URL.
-	Method  string              // Method is the HTTP method (e.g., GET, POST).
-	Headers map[string][]string // Headers contains the request headers.
-	Body    []byte              // Body is the raw request body.
+	URL       string              // URL is the request URL.
+	Method    string              // Method is the HTTP method (e.g., GET, POST).
+	Headers   map[string][]string // Headers contains the request headers.
+	Body      []byte              // Body is the raw request body.
+	RequestID string              // RequestID is the unique identifier for the request.
 }

 // ResponseWriterWrapper wraps the standard gin.ResponseWriter to intercept and log response data.
@@ -149,6 +150,7 @@ func (w *ResponseWriterWrapper) WriteHeader(statusCode int) {
 			w.requestInfo.Method,
 			w.requestInfo.Headers,
 			w.requestInfo.Body,
+			w.requestInfo.RequestID,
 		)
 		if err == nil {
 			w.streamWriter = streamWriter
@@ -346,7 +348,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 	}

 	if loggerWithOptions, ok := w.logger.(interface {
-		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool) error
+		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string) error
 	}); ok {
 		return loggerWithOptions.LogRequestWithOptions(
 			w.requestInfo.URL,
@@ -360,6 +362,7 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 			apiResponseBody,
 			apiResponseErrors,
 			forceLog,
+			w.requestInfo.RequestID,
 		)
 	}

@@ -374,5 +377,6 @@ func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]
 		apiRequestBody,
 		apiResponseBody,
 		apiResponseErrors,
+		w.requestInfo.RequestID,
 	)
 }
--- a/internal/api/modules/amp/amp.go
+++ b/internal/api/modules/amp/amp.go
@@ -279,16 +279,23 @@ func (m *AmpModule) hasModelMappingsChanged(old *config.AmpCode, new *config.Amp
 		return true
 	}

-	// Build map for efficient comparison
-	oldMap := make(map[string]string, len(old.ModelMappings))
+	// Build map for efficient and robust comparison
+	type mappingInfo struct {
+		to    string
+		regex bool
+	}
+	oldMap := make(map[string]mappingInfo, len(old.ModelMappings))
 	for _, mapping := range old.ModelMappings {
-		oldMap[strings.TrimSpace(mapping.From)] = strings.TrimSpace(mapping.To)
+		oldMap[strings.TrimSpace(mapping.From)] = mappingInfo{
+			to:    strings.TrimSpace(mapping.To),
+			regex: mapping.Regex,
+		}
 	}

 	for _, mapping := range new.ModelMappings {
 		from := strings.TrimSpace(mapping.From)
 		to := strings.TrimSpace(mapping.To)
-		if oldTo, exists := oldMap[from]; !exists || oldTo != to {
+		if oldVal, exists := oldMap[from]; !exists || oldVal.to != to || oldVal.regex != mapping.Regex {
 			return true
 		}
 	}
--- a/internal/api/modules/amp/model_mapping.go
+++ b/internal/api/modules/amp/model_mapping.go
@@ -3,6 +3,7 @@
 package amp

 import (
+	"regexp"
 	"strings"
 	"sync"

@@ -26,13 +27,15 @@ type ModelMapper interface {
 // DefaultModelMapper implements ModelMapper with thread-safe mapping storage.
 type DefaultModelMapper struct {
 	mu       sync.RWMutex
-	mappings map[string]string // from -> to (normalized lowercase keys)
+	mappings map[string]string // exact: from -> to (normalized lowercase keys)
+	regexps  []regexMapping    // regex rules evaluated in order
 }

 // NewModelMapper creates a new model mapper with the given initial mappings.
 func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper {
 	m := &DefaultModelMapper{
 		mappings: make(map[string]string),
+		regexps:  nil,
 	}
 	m.UpdateMappings(mappings)
 	return m
@@ -55,7 +58,18 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 	// Check for direct mapping
 	targetModel, exists := m.mappings[normalizedRequest]
 	if !exists {
-		return ""
+		// Try regex mappings in order
+		base, _ := util.NormalizeThinkingModel(requestedModel)
+		for _, rm := range m.regexps {
+			if rm.re.MatchString(requestedModel) || (base != "" && rm.re.MatchString(base)) {
+				targetModel = rm.to
+				exists = true
+				break
+			}
+		}
+		if !exists {
+			return ""
+		}
 	}

 	// Verify target model has available providers
@@ -78,6 +92,7 @@ func (m *DefaultModelMapper) UpdateMappings(mappings []config.AmpModelMapping) {

 	// Clear and rebuild mappings
 	m.mappings = make(map[string]string, len(mappings))
+	m.regexps = make([]regexMapping, 0, len(mappings))

 	for _, mapping := range mappings {
 		from := strings.TrimSpace(mapping.From)
@@ -88,16 +103,30 @@ func (m *DefaultModelMapper) UpdateMappings(mappings []config.AmpModelMapping) {
 			continue
 		}

-		// Store with normalized lowercase key for case-insensitive lookup
-		normalizedFrom := strings.ToLower(from)
-		m.mappings[normalizedFrom] = to
-
-		log.Debugf("amp model mapping registered: %s -> %s", from, to)
+		if mapping.Regex {
+			// Compile case-insensitive regex; wrap with (?i) to match behavior of exact lookups
+			pattern := "(?i)" + from
+			re, err := regexp.Compile(pattern)
+			if err != nil {
+				log.Warnf("amp model mapping: invalid regex %q: %v", from, err)
+				continue
+			}
+			m.regexps = append(m.regexps, regexMapping{re: re, to: to})
+			log.Debugf("amp model regex mapping registered: /%s/ -> %s", from, to)
+		} else {
+			// Store with normalized lowercase key for case-insensitive lookup
+			normalizedFrom := strings.ToLower(from)
+			m.mappings[normalizedFrom] = to
+			log.Debugf("amp model mapping registered: %s -> %s", from, to)
+		}
 	}

 	if len(m.mappings) > 0 {
 		log.Infof("amp model mapping: loaded %d mapping(s)", len(m.mappings))
 	}
+	if n := len(m.regexps); n > 0 {
+		log.Infof("amp model mapping: loaded %d regex mapping(s)", n)
+	}
 }

 // GetMappings returns a copy of current mappings (for debugging/status).
@@ -111,3 +140,8 @@ func (m *DefaultModelMapper) GetMappings() map[string]string {
 	}
 	return result
 }
+
+type regexMapping struct {
+	re *regexp.Regexp
+	to string
+}
--- a/internal/api/modules/amp/model_mapping_test.go
+++ b/internal/api/modules/amp/model_mapping_test.go
@@ -203,3 +203,81 @@ func TestModelMapper_GetMappings_ReturnsCopy(t *testing.T) {
 		t.Error("Original map was modified")
 	}
 }
+
+func TestModelMapper_Regex_MatchBaseWithoutParens(t *testing.T) {
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient("test-client-regex-1", "gemini", []*registry.ModelInfo{
+		{ID: "gemini-2.5-pro", OwnedBy: "google", Type: "gemini"},
+	})
+	defer reg.UnregisterClient("test-client-regex-1")
+
+	mappings := []config.AmpModelMapping{
+		{From: "^gpt-5$", To: "gemini-2.5-pro", Regex: true},
+	}
+
+	mapper := NewModelMapper(mappings)
+
+	// Incoming model has reasoning suffix but should match base via regex
+	result := mapper.MapModel("gpt-5(high)")
+	if result != "gemini-2.5-pro" {
+		t.Errorf("Expected gemini-2.5-pro, got %s", result)
+	}
+}
+
+func TestModelMapper_Regex_ExactPrecedence(t *testing.T) {
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient("test-client-regex-2", "claude", []*registry.ModelInfo{
+		{ID: "claude-sonnet-4", OwnedBy: "anthropic", Type: "claude"},
+	})
+	reg.RegisterClient("test-client-regex-3", "gemini", []*registry.ModelInfo{
+		{ID: "gemini-2.5-pro", OwnedBy: "google", Type: "gemini"},
+	})
+	defer reg.UnregisterClient("test-client-regex-2")
+	defer reg.UnregisterClient("test-client-regex-3")
+
+	mappings := []config.AmpModelMapping{
+		{From: "gpt-5", To: "claude-sonnet-4"},                 // exact
+		{From: "^gpt-5.*$", To: "gemini-2.5-pro", Regex: true}, // regex
+	}
+
+	mapper := NewModelMapper(mappings)
+
+	// Exact match should win over regex
+	result := mapper.MapModel("gpt-5")
+	if result != "claude-sonnet-4" {
+		t.Errorf("Expected claude-sonnet-4, got %s", result)
+	}
+}
+
+func TestModelMapper_Regex_InvalidPattern_Skipped(t *testing.T) {
+	// Invalid regex should be skipped and not cause panic
+	mappings := []config.AmpModelMapping{
+		{From: "(", To: "target", Regex: true},
+	}
+
+	mapper := NewModelMapper(mappings)
+
+	result := mapper.MapModel("anything")
+	if result != "" {
+		t.Errorf("Expected empty result due to invalid regex, got %s", result)
+	}
+}
+
+func TestModelMapper_Regex_CaseInsensitive(t *testing.T) {
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient("test-client-regex-4", "claude", []*registry.ModelInfo{
+		{ID: "claude-sonnet-4", OwnedBy: "anthropic", Type: "claude"},
+	})
+	defer reg.UnregisterClient("test-client-regex-4")
+
+	mappings := []config.AmpModelMapping{
+		{From: "^CLAUDE-OPUS-.*$", To: "claude-sonnet-4", Regex: true},
+	}
+
+	mapper := NewModelMapper(mappings)
+
+	result := mapper.MapModel("claude-opus-4.5")
+	if result != "claude-sonnet-4" {
+		t.Errorf("Expected claude-sonnet-4, got %s", result)
+	}
+}
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -538,6 +538,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.DELETE("/logs", s.mgmt.DeleteLogs)
 		mgmt.GET("/request-error-logs", s.mgmt.GetRequestErrorLogs)
 		mgmt.GET("/request-error-logs/:name", s.mgmt.DownloadRequestErrorLog)
+		mgmt.GET("/request-log-by-id/:id", s.mgmt.GetRequestLogByID)
 		mgmt.GET("/request-log", s.mgmt.GetRequestLog)
 		mgmt.PUT("/request-log", s.mgmt.PutRequestLog)
 		mgmt.PATCH("/request-log", s.mgmt.PutRequestLog)
--- a/internal/auth/kiro/aws.go
+++ b/internal/auth/kiro/aws.go
@@ -40,6 +40,10 @@ type KiroTokenData struct {
 	ClientSecret string `json:"clientSecret,omitempty"`
 	// Email is the user's email address (used for file naming)
 	Email string `json:"email,omitempty"`
+	// StartURL is the IDC/Identity Center start URL (only for IDC auth method)
+	StartURL string `json:"startUrl,omitempty"`
+	// Region is the AWS region for IDC authentication (only for IDC auth method)
+	Region string `json:"region,omitempty"`
 }

 // KiroAuthBundle aggregates authentication data after OAuth flow completion
--- a/internal/auth/kiro/sso_oidc.go
+++ b/internal/auth/kiro/sso_oidc.go
@@ -2,16 +2,19 @@
 package kiro

 import (
+	"bufio"
 	"context"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/base64"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"html"
 	"io"
 	"net"
 	"net/http"
+	"os"
 	"strings"
 	"time"

@@ -24,19 +27,31 @@ import (
 const (
 	// AWS SSO OIDC endpoints
 	ssoOIDCEndpoint = "https://oidc.us-east-1.amazonaws.com"
-	
+
 	// Kiro's start URL for Builder ID
 	builderIDStartURL = "https://view.awsapps.com/start"
-	
+
+	// Default region for IDC
+	defaultIDCRegion = "us-east-1"
+
 	// Polling interval
 	pollInterval = 5 * time.Second
-	
+
 	// Authorization code flow callback
 	authCodeCallbackPath = "/oauth/callback"
 	authCodeCallbackPort = 19877
-	
+
 	// User-Agent to match official Kiro IDE
 	kiroUserAgent = "KiroIDE"
+
+	// IDC token refresh headers (matching Kiro IDE behavior)
+	idcAmzUserAgent = "aws-sdk-js/3.738.0 ua/2.1 os/other lang/js md/browser#unknown_unknown api/sso-oidc#3.738.0 m/E KiroIDE"
+)
+
+// Sentinel errors for OIDC token polling
+var (
+	ErrAuthorizationPending = errors.New("authorization_pending")
+	ErrSlowDown             = errors.New("slow_down")
 )

 // SSOOIDCClient handles AWS SSO OIDC authentication.
@@ -83,6 +98,440 @@ type CreateTokenResponse struct {
 	RefreshToken string `json:"refreshToken"`
 }

+// getOIDCEndpoint returns the OIDC endpoint for the given region.
+func getOIDCEndpoint(region string) string {
+	if region == "" {
+		region = defaultIDCRegion
+	}
+	return fmt.Sprintf("https://oidc.%s.amazonaws.com", region)
+}
+
+// promptInput prompts the user for input with an optional default value.
+func promptInput(prompt, defaultValue string) string {
+	reader := bufio.NewReader(os.Stdin)
+	if defaultValue != "" {
+		fmt.Printf("%s [%s]: ", prompt, defaultValue)
+	} else {
+		fmt.Printf("%s: ", prompt)
+	}
+	input, err := reader.ReadString('\n')
+	if err != nil {
+		log.Warnf("Error reading input: %v", err)
+		return defaultValue
+	}
+	input = strings.TrimSpace(input)
+	if input == "" {
+		return defaultValue
+	}
+	return input
+}
+
+// promptSelect prompts the user to select from options using number input.
+func promptSelect(prompt string, options []string) int {
+	reader := bufio.NewReader(os.Stdin)
+
+	for {
+		fmt.Println(prompt)
+		for i, opt := range options {
+			fmt.Printf("  %d) %s\n", i+1, opt)
+		}
+		fmt.Printf("Enter selection (1-%d): ", len(options))
+
+		input, err := reader.ReadString('\n')
+		if err != nil {
+			log.Warnf("Error reading input: %v", err)
+			return 0 // Default to first option on error
+		}
+		input = strings.TrimSpace(input)
+
+		// Parse the selection
+		var selection int
+		if _, err := fmt.Sscanf(input, "%d", &selection); err != nil || selection < 1 || selection > len(options) {
+			fmt.Printf("Invalid selection '%s'. Please enter a number between 1 and %d.\n\n", input, len(options))
+			continue
+		}
+		return selection - 1
+	}
+}
+
+// RegisterClientWithRegion registers a new OIDC client with AWS using a specific region.
+func (c *SSOOIDCClient) RegisterClientWithRegion(ctx context.Context, region string) (*RegisterClientResponse, error) {
+	endpoint := getOIDCEndpoint(region)
+
+	payload := map[string]interface{}{
+		"clientName": "Kiro IDE",
+		"clientType": "public",
+		"scopes":     []string{"codewhisperer:completions", "codewhisperer:analysis", "codewhisperer:conversations", "codewhisperer:transformations", "codewhisperer:taskassist"},
+		"grantTypes": []string{"urn:ietf:params:oauth:grant-type:device_code", "refresh_token"},
+	}
+
+	body, err := json.Marshal(payload)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint+"/client/register", strings.NewReader(string(body)))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("User-Agent", kiroUserAgent)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		log.Debugf("register client failed (status %d): %s", resp.StatusCode, string(respBody))
+		return nil, fmt.Errorf("register client failed (status %d)", resp.StatusCode)
+	}
+
+	var result RegisterClientResponse
+	if err := json.Unmarshal(respBody, &result); err != nil {
+		return nil, err
+	}
+
+	return &result, nil
+}
+
+// StartDeviceAuthorizationWithIDC starts the device authorization flow for IDC.
+func (c *SSOOIDCClient) StartDeviceAuthorizationWithIDC(ctx context.Context, clientID, clientSecret, startURL, region string) (*StartDeviceAuthResponse, error) {
+	endpoint := getOIDCEndpoint(region)
+
+	payload := map[string]string{
+		"clientId":     clientID,
+		"clientSecret": clientSecret,
+		"startUrl":     startURL,
+	}
+
+	body, err := json.Marshal(payload)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint+"/device_authorization", strings.NewReader(string(body)))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("User-Agent", kiroUserAgent)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		log.Debugf("start device auth failed (status %d): %s", resp.StatusCode, string(respBody))
+		return nil, fmt.Errorf("start device auth failed (status %d)", resp.StatusCode)
+	}
+
+	var result StartDeviceAuthResponse
+	if err := json.Unmarshal(respBody, &result); err != nil {
+		return nil, err
+	}
+
+	return &result, nil
+}
+
+// CreateTokenWithRegion polls for the access token after user authorization using a specific region.
+func (c *SSOOIDCClient) CreateTokenWithRegion(ctx context.Context, clientID, clientSecret, deviceCode, region string) (*CreateTokenResponse, error) {
+	endpoint := getOIDCEndpoint(region)
+
+	payload := map[string]string{
+		"clientId":     clientID,
+		"clientSecret": clientSecret,
+		"deviceCode":   deviceCode,
+		"grantType":    "urn:ietf:params:oauth:grant-type:device_code",
+	}
+
+	body, err := json.Marshal(payload)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint+"/token", strings.NewReader(string(body)))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("User-Agent", kiroUserAgent)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	// Check for pending authorization
+	if resp.StatusCode == http.StatusBadRequest {
+		var errResp struct {
+			Error string `json:"error"`
+		}
+		if json.Unmarshal(respBody, &errResp) == nil {
+			if errResp.Error == "authorization_pending" {
+				return nil, ErrAuthorizationPending
+			}
+			if errResp.Error == "slow_down" {
+				return nil, ErrSlowDown
+			}
+		}
+		log.Debugf("create token failed: %s", string(respBody))
+		return nil, fmt.Errorf("create token failed")
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		log.Debugf("create token failed (status %d): %s", resp.StatusCode, string(respBody))
+		return nil, fmt.Errorf("create token failed (status %d)", resp.StatusCode)
+	}
+
+	var result CreateTokenResponse
+	if err := json.Unmarshal(respBody, &result); err != nil {
+		return nil, err
+	}
+
+	return &result, nil
+}
+
+// RefreshTokenWithRegion refreshes an access token using the refresh token with a specific region.
+func (c *SSOOIDCClient) RefreshTokenWithRegion(ctx context.Context, clientID, clientSecret, refreshToken, region, startURL string) (*KiroTokenData, error) {
+	endpoint := getOIDCEndpoint(region)
+
+	payload := map[string]string{
+		"clientId":     clientID,
+		"clientSecret": clientSecret,
+		"refreshToken": refreshToken,
+		"grantType":    "refresh_token",
+	}
+
+	body, err := json.Marshal(payload)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint+"/token", strings.NewReader(string(body)))
+	if err != nil {
+		return nil, err
+	}
+
+	// Set headers matching kiro2api's IDC token refresh
+	// These headers are required for successful IDC token refresh
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Host", fmt.Sprintf("oidc.%s.amazonaws.com", region))
+	req.Header.Set("Connection", "keep-alive")
+	req.Header.Set("x-amz-user-agent", idcAmzUserAgent)
+	req.Header.Set("Accept", "*/*")
+	req.Header.Set("Accept-Language", "*")
+	req.Header.Set("sec-fetch-mode", "cors")
+	req.Header.Set("User-Agent", "node")
+	req.Header.Set("Accept-Encoding", "br, gzip, deflate")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		log.Warnf("IDC token refresh failed (status %d): %s", resp.StatusCode, string(respBody))
+		return nil, fmt.Errorf("token refresh failed (status %d)", resp.StatusCode)
+	}
+
+	var result CreateTokenResponse
+	if err := json.Unmarshal(respBody, &result); err != nil {
+		return nil, err
+	}
+
+	expiresAt := time.Now().Add(time.Duration(result.ExpiresIn) * time.Second)
+
+	return &KiroTokenData{
+		AccessToken:  result.AccessToken,
+		RefreshToken: result.RefreshToken,
+		ExpiresAt:    expiresAt.Format(time.RFC3339),
+		AuthMethod:   "idc",
+		Provider:     "AWS",
+		ClientID:     clientID,
+		ClientSecret: clientSecret,
+		StartURL:     startURL,
+		Region:       region,
+	}, nil
+}
+
+// LoginWithIDC performs the full device code flow for AWS Identity Center (IDC).
+func (c *SSOOIDCClient) LoginWithIDC(ctx context.Context, startURL, region string) (*KiroTokenData, error) {
+	fmt.Println("\n╔══════════════════════════════════════════════════════════╗")
+	fmt.Println("║       Kiro Authentication (AWS Identity Center)          ║")
+	fmt.Println("╚══════════════════════════════════════════════════════════╝")
+
+	// Step 1: Register client with the specified region
+	fmt.Println("\nRegistering client...")
+	regResp, err := c.RegisterClientWithRegion(ctx, region)
+	if err != nil {
+		return nil, fmt.Errorf("failed to register client: %w", err)
+	}
+	log.Debugf("Client registered: %s", regResp.ClientID)
+
+	// Step 2: Start device authorization with IDC start URL
+	fmt.Println("Starting device authorization...")
+	authResp, err := c.StartDeviceAuthorizationWithIDC(ctx, regResp.ClientID, regResp.ClientSecret, startURL, region)
+	if err != nil {
+		return nil, fmt.Errorf("failed to start device auth: %w", err)
+	}
+
+	// Step 3: Show user the verification URL
+	fmt.Printf("\n")
+	fmt.Println("════════════════════════════════════════════════════════════")
+	fmt.Printf("  Confirm the following code in the browser:\n")
+	fmt.Printf("  Code: %s\n", authResp.UserCode)
+	fmt.Println("════════════════════════════════════════════════════════════")
+	fmt.Printf("\n  Open this URL: %s\n\n", authResp.VerificationURIComplete)
+
+	// Set incognito mode based on config
+	if c.cfg != nil {
+		browser.SetIncognitoMode(c.cfg.IncognitoBrowser)
+		if !c.cfg.IncognitoBrowser {
+			log.Info("kiro: using normal browser mode (--no-incognito). Note: You may not be able to select a different account.")
+		} else {
+			log.Debug("kiro: using incognito mode for multi-account support")
+		}
+	} else {
+		browser.SetIncognitoMode(true)
+		log.Debug("kiro: using incognito mode for multi-account support (default)")
+	}
+
+	// Open browser
+	if err := browser.OpenURL(authResp.VerificationURIComplete); err != nil {
+		log.Warnf("Could not open browser automatically: %v", err)
+		fmt.Println("  Please open the URL manually in your browser.")
+	} else {
+		fmt.Println("  (Browser opened automatically)")
+	}
+
+	// Step 4: Poll for token
+	fmt.Println("Waiting for authorization...")
+
+	interval := pollInterval
+	if authResp.Interval > 0 {
+		interval = time.Duration(authResp.Interval) * time.Second
+	}
+
+	deadline := time.Now().Add(time.Duration(authResp.ExpiresIn) * time.Second)
+
+	for time.Now().Before(deadline) {
+		select {
+		case <-ctx.Done():
+			browser.CloseBrowser()
+			return nil, ctx.Err()
+		case <-time.After(interval):
+			tokenResp, err := c.CreateTokenWithRegion(ctx, regResp.ClientID, regResp.ClientSecret, authResp.DeviceCode, region)
+			if err != nil {
+				if errors.Is(err, ErrAuthorizationPending) {
+					fmt.Print(".")
+					continue
+				}
+				if errors.Is(err, ErrSlowDown) {
+					interval += 5 * time.Second
+					continue
+				}
+				browser.CloseBrowser()
+				return nil, fmt.Errorf("token creation failed: %w", err)
+			}
+
+			fmt.Println("\n\n✓ Authorization successful!")
+
+			// Close the browser window
+			if err := browser.CloseBrowser(); err != nil {
+				log.Debugf("Failed to close browser: %v", err)
+			}
+
+			// Step 5: Get profile ARN from CodeWhisperer API
+			fmt.Println("Fetching profile information...")
+			profileArn := c.fetchProfileArn(ctx, tokenResp.AccessToken)
+
+			// Fetch user email
+			email := FetchUserEmailWithFallback(ctx, c.cfg, tokenResp.AccessToken)
+			if email != "" {
+				fmt.Printf("  Logged in as: %s\n", email)
+			}
+
+			expiresAt := time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second)
+
+			return &KiroTokenData{
+				AccessToken:  tokenResp.AccessToken,
+				RefreshToken: tokenResp.RefreshToken,
+				ProfileArn:   profileArn,
+				ExpiresAt:    expiresAt.Format(time.RFC3339),
+				AuthMethod:   "idc",
+				Provider:     "AWS",
+				ClientID:     regResp.ClientID,
+				ClientSecret: regResp.ClientSecret,
+				Email:        email,
+				StartURL:     startURL,
+				Region:       region,
+			}, nil
+		}
+	}
+
+	// Close browser on timeout
+	if err := browser.CloseBrowser(); err != nil {
+		log.Debugf("Failed to close browser on timeout: %v", err)
+	}
+	return nil, fmt.Errorf("authorization timed out")
+}
+
+// LoginWithMethodSelection prompts the user to select between Builder ID and IDC, then performs the login.
+func (c *SSOOIDCClient) LoginWithMethodSelection(ctx context.Context) (*KiroTokenData, error) {
+	fmt.Println("\n╔══════════════════════════════════════════════════════════╗")
+	fmt.Println("║              Kiro Authentication (AWS)                    ║")
+	fmt.Println("╚══════════════════════════════════════════════════════════╝")
+
+	// Prompt for login method
+	options := []string{
+		"Use with Builder ID (personal AWS account)",
+		"Use with IDC Account (organization SSO)",
+	}
+	selection := promptSelect("\n? Select login method:", options)
+
+	if selection == 0 {
+		// Builder ID flow - use existing implementation
+		return c.LoginWithBuilderID(ctx)
+	}
+
+	// IDC flow - prompt for start URL and region
+	fmt.Println()
+	startURL := promptInput("? Enter Start URL", "")
+	if startURL == "" {
+		return nil, fmt.Errorf("start URL is required for IDC login")
+	}
+
+	region := promptInput("? Enter Region", defaultIDCRegion)
+
+	return c.LoginWithIDC(ctx, startURL, region)
+}
+
 // RegisterClient registers a new OIDC client with AWS.
 func (c *SSOOIDCClient) RegisterClient(ctx context.Context) (*RegisterClientResponse, error) {
 	payload := map[string]interface{}{
@@ -211,10 +660,10 @@ func (c *SSOOIDCClient) CreateToken(ctx context.Context, clientID, clientSecret,
 		}
 		if json.Unmarshal(respBody, &errResp) == nil {
 			if errResp.Error == "authorization_pending" {
-				return nil, fmt.Errorf("authorization_pending")
+				return nil, ErrAuthorizationPending
 			}
 			if errResp.Error == "slow_down" {
-				return nil, fmt.Errorf("slow_down")
+				return nil, ErrSlowDown
 			}
 		}
 		log.Debugf("create token failed: %s", string(respBody))
@@ -359,12 +808,11 @@ func (c *SSOOIDCClient) LoginWithBuilderID(ctx context.Context) (*KiroTokenData,
 		case <-time.After(interval):
 			tokenResp, err := c.CreateToken(ctx, regResp.ClientID, regResp.ClientSecret, authResp.DeviceCode)
 			if err != nil {
-				errStr := err.Error()
-				if strings.Contains(errStr, "authorization_pending") {
+				if errors.Is(err, ErrAuthorizationPending) {
 					fmt.Print(".")
 					continue
 				}
-				if strings.Contains(errStr, "slow_down") {
+				if errors.Is(err, ErrSlowDown) {
 					interval += 5 * time.Second
 					continue
 				}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -156,6 +156,11 @@ type AmpModelMapping struct {
 	// To is the target model name to route to (e.g., "claude-sonnet-4").
 	// The target model must have available providers in the registry.
 	To string `yaml:"to" json:"to"`
+
+	// Regex indicates whether the 'from' field should be interpreted as a regular
+	// expression for matching model names. When true, this mapping is evaluated
+	// after exact matches and in the order provided. Defaults to false (exact match).
+	Regex bool `yaml:"regex,omitempty" json:"regex,omitempty"`
 }

 // AmpCode groups Amp CLI integration settings including upstream routing,
@@ -401,7 +406,7 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	cfg.DisableCooling = false
 	cfg.AmpCode.RestrictManagementToLocalhost = false // Default to false: API key auth is sufficient
 	cfg.RemoteManagement.PanelGitHubRepository = DefaultPanelGitHubRepository
-	cfg.IncognitoBrowser = false                     // Default to normal browser (AWS uses incognito by force)
+	cfg.IncognitoBrowser = false // Default to normal browser (AWS uses incognito by force)
 	if err = yaml.Unmarshal(data, &cfg); err != nil {
 		if optional {
 			// In cloud deploy mode, if YAML parsing fails, return empty config instead of error.
--- a/internal/config/sdk_config.go
+++ b/internal/config/sdk_config.go
@@ -22,6 +22,21 @@ type SDKConfig struct {

 	// Access holds request authentication provider configuration.
 	Access AccessConfig `yaml:"auth,omitempty" json:"auth,omitempty"`
+
+	// Streaming configures server-side streaming behavior (keep-alives and safe bootstrap retries).
+	Streaming StreamingConfig `yaml:"streaming" json:"streaming"`
+}
+
+// StreamingConfig holds server streaming behavior configuration.
+type StreamingConfig struct {
+	// KeepAliveSeconds controls how often the server emits SSE heartbeats (": keep-alive\n\n").
+	// nil means default (15 seconds). <= 0 disables keep-alives.
+	KeepAliveSeconds *int `yaml:"keepalive-seconds,omitempty" json:"keepalive-seconds,omitempty"`
+
+	// BootstrapRetries controls how many times the server may retry a streaming request before any bytes are sent,
+	// to allow auth rotation / transient recovery.
+	// nil means default (2). 0 disables bootstrap retries.
+	BootstrapRetries *int `yaml:"bootstrap-retries,omitempty" json:"bootstrap-retries,omitempty"`
 }

 // AccessConfig groups request authentication providers.
--- a/internal/logging/gin_logger.go
+++ b/internal/logging/gin_logger.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"net/http"
 	"runtime/debug"
+	"strings"
 	"time"

 	"github.com/gin-gonic/gin"
@@ -14,11 +15,24 @@ import (
 	log "github.com/sirupsen/logrus"
 )

+// aiAPIPrefixes defines path prefixes for AI API requests that should have request ID tracking.
+var aiAPIPrefixes = []string{
+	"/v1/chat/completions",
+	"/v1/completions",
+	"/v1/messages",
+	"/v1/responses",
+	"/v1beta/models/",
+	"/api/provider/",
+}
+
 const skipGinLogKey = "__gin_skip_request_logging__"

 // GinLogrusLogger returns a Gin middleware handler that logs HTTP requests and responses
 // using logrus. It captures request details including method, path, status code, latency,
-// client IP, and any error messages, formatting them in a Gin-style log format.
+// client IP, and any error messages. Request ID is only added for AI API requests.
+//
+// Output format (AI API): [2025-12-23 20:14:10] [info ] | a1b2c3d4 | 200 |       23.559s | ...
+// Output format (others): [2025-12-23 20:14:10] [info ] | -------- | 200 |       23.559s | ...
 //
 // Returns:
 //   - gin.HandlerFunc: A middleware handler for request logging
@@ -28,6 +42,15 @@ func GinLogrusLogger() gin.HandlerFunc {
 		path := c.Request.URL.Path
 		raw := util.MaskSensitiveQuery(c.Request.URL.RawQuery)

+		// Only generate request ID for AI API paths
+		var requestID string
+		if isAIAPIPath(path) {
+			requestID = GenerateRequestID()
+			SetGinRequestID(c, requestID)
+			ctx := WithRequestID(c.Request.Context(), requestID)
+			c.Request = c.Request.WithContext(ctx)
+		}
+
 		c.Next()

 		if shouldSkipGinRequestLogging(c) {
@@ -49,23 +72,38 @@ func GinLogrusLogger() gin.HandlerFunc {
 		clientIP := c.ClientIP()
 		method := c.Request.Method
 		errorMessage := c.Errors.ByType(gin.ErrorTypePrivate).String()
-		timestamp := time.Now().Format("2006/01/02 - 15:04:05")
-		logLine := fmt.Sprintf("[GIN] %s | %3d | %13v | %15s | %-7s \"%s\"", timestamp, statusCode, latency, clientIP, method, path)
+
+		if requestID == "" {
+			requestID = "--------"
+		}
+		logLine := fmt.Sprintf("%3d | %13v | %15s | %-7s \"%s\"", statusCode, latency, clientIP, method, path)
 		if errorMessage != "" {
 			logLine = logLine + " | " + errorMessage
 		}

+		entry := log.WithField("request_id", requestID)
+
 		switch {
 		case statusCode >= http.StatusInternalServerError:
-			log.Error(logLine)
+			entry.Error(logLine)
 		case statusCode >= http.StatusBadRequest:
-			log.Warn(logLine)
+			entry.Warn(logLine)
 		default:
-			log.Info(logLine)
+			entry.Info(logLine)
 		}
 	}
 }

+// isAIAPIPath checks if the given path is an AI API endpoint that should have request ID tracking.
+func isAIAPIPath(path string) bool {
+	for _, prefix := range aiAPIPrefixes {
+		if strings.HasPrefix(path, prefix) {
+			return true
+		}
+	}
+	return false
+}
+
 // GinLogrusRecovery returns a Gin middleware handler that recovers from panics and logs
 // them using logrus. When a panic occurs, it captures the panic value, stack trace,
 // and request path, then returns a 500 Internal Server Error response to the client.
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -24,7 +24,8 @@ var (
 )

 // LogFormatter defines a custom log format for logrus.
-// This formatter adds timestamp, level, and source location to each log entry.
+// This formatter adds timestamp, level, request ID, and source location to each log entry.
+// Format: [2025-12-23 20:14:04] [debug] [manager.go:524] | a1b2c3d4 | Use API key sk-9...0RHO for model gpt-5.2
 type LogFormatter struct{}

 // Format renders a single log entry with custom formatting.
@@ -38,16 +39,24 @@ func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {

 	timestamp := entry.Time.Format("2006-01-02 15:04:05")
 	message := strings.TrimRight(entry.Message, "\r\n")
-	
-	// Handle nil Caller (can happen with some log entries)
-	callerFile := "unknown"
-	callerLine := 0
-	if entry.Caller != nil {
-		callerFile = filepath.Base(entry.Caller.File)
-		callerLine = entry.Caller.Line
+
+	reqID := "--------"
+	if id, ok := entry.Data["request_id"].(string); ok && id != "" {
+		reqID = id
+	}
+
+	level := entry.Level.String()
+	if level == "warning" {
+		level = "warn"
+	}
+	levelStr := fmt.Sprintf("%-5s", level)
+
+	var formatted string
+	if entry.Caller != nil {
+		formatted = fmt.Sprintf("[%s] [%s] [%s] [%s:%d] %s\n", timestamp, reqID, levelStr, filepath.Base(entry.Caller.File), entry.Caller.Line, message)
+	} else {
+		formatted = fmt.Sprintf("[%s] [%s] [%s] %s\n", timestamp, reqID, levelStr, message)
 	}
-	
-	formatted := fmt.Sprintf("[%s] [%s] [%s:%d] %s\n", timestamp, entry.Level, callerFile, callerLine, message)
 	buffer.WriteString(formatted)

 	return buffer.Bytes(), nil
--- a/internal/logging/request_logger.go
+++ b/internal/logging/request_logger.go
@@ -43,10 +43,11 @@ type RequestLogger interface {
 	//   - response: The raw response data
 	//   - apiRequest: The API request data
 	//   - apiResponse: The API response data
+	//   - requestID: Optional request ID for log file naming
 	//
 	// Returns:
 	//   - error: An error if logging fails, nil otherwise
-	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage) error
+	LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error

 	// LogStreamingRequest initiates logging for a streaming request and returns a writer for chunks.
 	//
@@ -55,11 +56,12 @@ type RequestLogger interface {
 	//   - method: The HTTP method
 	//   - headers: The request headers
 	//   - body: The request body
+	//   - requestID: Optional request ID for log file naming
 	//
 	// Returns:
 	//   - StreamingLogWriter: A writer for streaming response chunks
 	//   - error: An error if logging initialization fails, nil otherwise
-	LogStreamingRequest(url, method string, headers map[string][]string, body []byte) (StreamingLogWriter, error)
+	LogStreamingRequest(url, method string, headers map[string][]string, body []byte, requestID string) (StreamingLogWriter, error)

 	// IsEnabled returns whether request logging is currently enabled.
 	//
@@ -177,20 +179,21 @@ func (l *FileRequestLogger) SetEnabled(enabled bool) {
 //   - response: The raw response data
 //   - apiRequest: The API request data
 //   - apiResponse: The API response data
+//   - requestID: Optional request ID for log file naming
 //
 // Returns:
 //   - error: An error if logging fails, nil otherwise
-func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false)
+func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID)
 }

 // LogRequestWithOptions logs a request with optional forced logging behavior.
 // The force flag allows writing error logs even when regular request logging is disabled.
-func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool) error {
-	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force)
+func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID)
 }

-func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool) error {
+func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string) error {
 	if !l.enabled && !force {
 		return nil
 	}
@@ -200,10 +203,10 @@ func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[st
 		return fmt.Errorf("failed to create logs directory: %w", errEnsure)
 	}

-	// Generate filename
-	filename := l.generateFilename(url)
+	// Generate filename with request ID
+	filename := l.generateFilename(url, requestID)
 	if force && !l.enabled {
-		filename = l.generateErrorFilename(url)
+		filename = l.generateErrorFilename(url, requestID)
 	}
 	filePath := filepath.Join(l.logsDir, filename)

@@ -271,11 +274,12 @@ func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[st
 //   - method: The HTTP method
 //   - headers: The request headers
 //   - body: The request body
+//   - requestID: Optional request ID for log file naming
 //
 // Returns:
 //   - StreamingLogWriter: A writer for streaming response chunks
 //   - error: An error if logging initialization fails, nil otherwise
-func (l *FileRequestLogger) LogStreamingRequest(url, method string, headers map[string][]string, body []byte) (StreamingLogWriter, error) {
+func (l *FileRequestLogger) LogStreamingRequest(url, method string, headers map[string][]string, body []byte, requestID string) (StreamingLogWriter, error) {
 	if !l.enabled {
 		return &NoOpStreamingLogWriter{}, nil
 	}
@@ -285,8 +289,8 @@ func (l *FileRequestLogger) LogStreamingRequest(url, method string, headers map[
 		return nil, fmt.Errorf("failed to create logs directory: %w", err)
 	}

-	// Generate filename
-	filename := l.generateFilename(url)
+	// Generate filename with request ID
+	filename := l.generateFilename(url, requestID)
 	filePath := filepath.Join(l.logsDir, filename)

 	requestHeaders := make(map[string][]string, len(headers))
@@ -330,8 +334,8 @@ func (l *FileRequestLogger) LogStreamingRequest(url, method string, headers map[
 }

 // generateErrorFilename creates a filename with an error prefix to differentiate forced error logs.
-func (l *FileRequestLogger) generateErrorFilename(url string) string {
-	return fmt.Sprintf("error-%s", l.generateFilename(url))
+func (l *FileRequestLogger) generateErrorFilename(url string, requestID ...string) string {
+	return fmt.Sprintf("error-%s", l.generateFilename(url, requestID...))
 }

 // ensureLogsDir creates the logs directory if it doesn't exist.
@@ -346,13 +350,15 @@ func (l *FileRequestLogger) ensureLogsDir() error {
 }

 // generateFilename creates a sanitized filename from the URL path and current timestamp.
+// Format: v1-responses-2025-12-23T195811-a1b2c3d4.log
 //
 // Parameters:
 //   - url: The request URL
+//   - requestID: Optional request ID to include in filename
 //
 // Returns:
 //   - string: A sanitized filename for the log file
-func (l *FileRequestLogger) generateFilename(url string) string {
+func (l *FileRequestLogger) generateFilename(url string, requestID ...string) string {
 	// Extract path from URL
 	path := url
 	if strings.Contains(url, "?") {
@@ -368,12 +374,18 @@ func (l *FileRequestLogger) generateFilename(url string) string {
 	sanitized := l.sanitizeForFilename(path)

 	// Add timestamp
-	timestamp := time.Now().Format("2006-01-02T150405-.000000000")
-	timestamp = strings.Replace(timestamp, ".", "", -1)
+	timestamp := time.Now().Format("2006-01-02T150405")

-	id := requestLogID.Add(1)
+	// Use request ID if provided, otherwise use sequential ID
+	var idPart string
+	if len(requestID) > 0 && requestID[0] != "" {
+		idPart = requestID[0]
+	} else {
+		id := requestLogID.Add(1)
+		idPart = fmt.Sprintf("%d", id)
+	}

-	return fmt.Sprintf("%s-%s-%d.log", sanitized, timestamp, id)
+	return fmt.Sprintf("%s-%s-%s.log", sanitized, timestamp, idPart)
 }

 // sanitizeForFilename replaces characters that are not safe for filenames.
--- a/internal/logging/requestid.go
+++ b/internal/logging/requestid.go
@@ -0,0 +1,61 @@
+package logging
+
+import (
+	"context"
+	"crypto/rand"
+	"encoding/hex"
+
+	"github.com/gin-gonic/gin"
+)
+
+// requestIDKey is the context key for storing/retrieving request IDs.
+type requestIDKey struct{}
+
+// ginRequestIDKey is the Gin context key for request IDs.
+const ginRequestIDKey = "__request_id__"
+
+// GenerateRequestID creates a new 8-character hex request ID.
+func GenerateRequestID() string {
+	b := make([]byte, 4)
+	if _, err := rand.Read(b); err != nil {
+		return "00000000"
+	}
+	return hex.EncodeToString(b)
+}
+
+// WithRequestID returns a new context with the request ID attached.
+func WithRequestID(ctx context.Context, requestID string) context.Context {
+	return context.WithValue(ctx, requestIDKey{}, requestID)
+}
+
+// GetRequestID retrieves the request ID from the context.
+// Returns empty string if not found.
+func GetRequestID(ctx context.Context) string {
+	if ctx == nil {
+		return ""
+	}
+	if id, ok := ctx.Value(requestIDKey{}).(string); ok {
+		return id
+	}
+	return ""
+}
+
+// SetGinRequestID stores the request ID in the Gin context.
+func SetGinRequestID(c *gin.Context, requestID string) {
+	if c != nil {
+		c.Set(ginRequestIDKey, requestID)
+	}
+}
+
+// GetGinRequestID retrieves the request ID from the Gin context.
+func GetGinRequestID(c *gin.Context) string {
+	if c == nil {
+		return ""
+	}
+	if id, exists := c.Get(ginRequestIDKey); exists {
+		if s, ok := id.(string); ok {
+			return s
+		}
+	}
+	return ""
+}
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -727,6 +727,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400},
 		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
+		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
 		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
 		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
@@ -740,6 +741,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
 		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
+		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -42,7 +42,7 @@ const (
 	antigravityModelsPath          = "/v1internal:fetchAvailableModels"
 	antigravityClientID            = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
 	antigravityClientSecret        = "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"
-	defaultAntigravityAgent        = "antigravity/1.11.5 windows/amd64"
+	defaultAntigravityAgent        = "antigravity/1.104.0 darwin/arm64"
 	antigravityAuthType            = "antigravity"
 	refreshSkew                    = 3000 * time.Second
 )
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -10,6 +10,8 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"os"
+	"path/filepath"
 	"strings"
 	"sync"
 	"time"
@@ -43,10 +45,15 @@ const (
 	// Event Stream error type constants
 	ErrStreamFatal     = "fatal"     // Connection/authentication errors, not recoverable
 	ErrStreamMalformed = "malformed" // Format errors, data cannot be parsed
-	// kiroUserAgent matches amq2api format for User-Agent header
+	// kiroUserAgent matches amq2api format for User-Agent header (Amazon Q CLI style)
 	kiroUserAgent = "aws-sdk-rust/1.3.9 os/macos lang/rust/1.87.0"
-	// kiroFullUserAgent is the complete x-amz-user-agent header matching amq2api
+	// kiroFullUserAgent is the complete x-amz-user-agent header matching amq2api (Amazon Q CLI style)
 	kiroFullUserAgent = "aws-sdk-rust/1.3.9 ua/2.1 api/ssooidc/1.88.0 os/macos lang/rust/1.87.0 m/E app/AmazonQ-For-CLI"
+
+	// Kiro IDE style headers (from kiro2api - for IDC auth)
+	kiroIDEUserAgent     = "aws-sdk-js/1.0.18 ua/2.1 os/darwin#25.0.0 lang/js md/nodejs#20.16.0 api/codewhispererstreaming#1.0.18 m/E KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
+	kiroIDEAmzUserAgent  = "aws-sdk-js/1.0.18 KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
+	kiroIDEAgentModeSpec = "spec"
 )

 // Real-time usage estimation configuration
@@ -101,11 +108,24 @@ var kiroEndpointConfigs = []kiroEndpointConfig{

 // getKiroEndpointConfigs returns the list of Kiro API endpoint configurations to try in order.
 // Supports reordering based on "preferred_endpoint" in auth metadata/attributes.
+// For IDC auth method, automatically uses CodeWhisperer endpoint with CLI origin.
 func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig {
 	if auth == nil {
 		return kiroEndpointConfigs
 	}

+	// For IDC auth, use CodeWhisperer endpoint with AI_EDITOR origin (same as Social auth)
+	// Based on kiro2api analysis: IDC tokens work with CodeWhisperer endpoint using Bearer auth
+	// The difference is only in how tokens are refreshed (OIDC with clientId/clientSecret for IDC)
+	// NOT in how API calls are made - both Social and IDC use the same endpoint/origin
+	if auth.Metadata != nil {
+		authMethod, _ := auth.Metadata["auth_method"].(string)
+		if authMethod == "idc" {
+			log.Debugf("kiro: IDC auth, using CodeWhisperer endpoint")
+			return kiroEndpointConfigs
+		}
+	}
+
 	// Check for preference
 	var preference string
 	if auth.Metadata != nil {
@@ -162,6 +182,15 @@ type KiroExecutor struct {
 	refreshMu sync.Mutex // Serializes token refresh operations to prevent race conditions
 }

+// isIDCAuth checks if the auth uses IDC (Identity Center) authentication method.
+func isIDCAuth(auth *cliproxyauth.Auth) bool {
+	if auth == nil || auth.Metadata == nil {
+		return false
+	}
+	authMethod, _ := auth.Metadata["auth_method"].(string)
+	return authMethod == "idc"
+}
+
 // buildKiroPayloadForFormat builds the Kiro API payload based on the source format.
 // This is critical because OpenAI and Claude formats have different tool structures:
 // - OpenAI: tools[].function.name, tools[].function.description
@@ -210,6 +239,10 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 			log.Warnf("kiro: pre-request token refresh failed: %v", refreshErr)
 		} else if refreshedAuth != nil {
 			auth = refreshedAuth
+			// Persist the refreshed auth to file so subsequent requests use it
+			if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
+				log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
+			}
 			accessToken, profileArn = kiroCredentials(auth)
 			log.Infof("kiro: token refreshed successfully before request")
 		}
@@ -262,15 +295,28 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
 			}

 			httpReq.Header.Set("Content-Type", kiroContentType)
-			httpReq.Header.Set("Authorization", "Bearer "+accessToken)
 			httpReq.Header.Set("Accept", kiroAcceptStream)
 			// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
 			httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
-			httpReq.Header.Set("User-Agent", kiroUserAgent)
-			httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
+
+			// Use different headers based on auth type
+			// IDC auth uses Kiro IDE style headers (from kiro2api)
+			// Other auth types use Amazon Q CLI style headers
+			if isIDCAuth(auth) {
+				httpReq.Header.Set("User-Agent", kiroIDEUserAgent)
+				httpReq.Header.Set("X-Amz-User-Agent", kiroIDEAmzUserAgent)
+				httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
+				log.Debugf("kiro: using Kiro IDE headers for IDC auth")
+			} else {
+				httpReq.Header.Set("User-Agent", kiroUserAgent)
+				httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
+			}
 			httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 			httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())

+			// Bearer token authentication for all auth types (Builder ID, IDC, social, etc.)
+			httpReq.Header.Set("Authorization", "Bearer "+accessToken)
+
 			var attrs map[string]string
 			if auth != nil {
 				attrs = auth.Attributes
@@ -358,6 +404,11 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.

 					if refreshedAuth != nil {
 						auth = refreshedAuth
+						// Persist the refreshed auth to file so subsequent requests use it
+						if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
+							log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
+							// Continue anyway - the token is valid for this request
+						}
 						accessToken, profileArn = kiroCredentials(auth)
 						// Rebuild payload with new profile ARN if changed
 						kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
@@ -416,6 +467,11 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
 					}
 					if refreshedAuth != nil {
 						auth = refreshedAuth
+						// Persist the refreshed auth to file so subsequent requests use it
+						if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
+							log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
+							// Continue anyway - the token is valid for this request
+						}
 						accessToken, profileArn = kiroCredentials(auth)
 						kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
 						log.Infof("kiro: token refreshed for 403, retrying request")
@@ -518,6 +574,10 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			log.Warnf("kiro: pre-request token refresh failed: %v", refreshErr)
 		} else if refreshedAuth != nil {
 			auth = refreshedAuth
+			// Persist the refreshed auth to file so subsequent requests use it
+			if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
+				log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
+			}
 			accessToken, profileArn = kiroCredentials(auth)
 			log.Infof("kiro: token refreshed successfully before stream request")
 		}
@@ -568,15 +628,28 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
 			}

 			httpReq.Header.Set("Content-Type", kiroContentType)
-			httpReq.Header.Set("Authorization", "Bearer "+accessToken)
 			httpReq.Header.Set("Accept", kiroAcceptStream)
 			// Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors)
 			httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget)
-			httpReq.Header.Set("User-Agent", kiroUserAgent)
-			httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
+
+			// Use different headers based on auth type
+			// IDC auth uses Kiro IDE style headers (from kiro2api)
+			// Other auth types use Amazon Q CLI style headers
+			if isIDCAuth(auth) {
+				httpReq.Header.Set("User-Agent", kiroIDEUserAgent)
+				httpReq.Header.Set("X-Amz-User-Agent", kiroIDEAmzUserAgent)
+				httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
+				log.Debugf("kiro: using Kiro IDE headers for IDC auth")
+			} else {
+				httpReq.Header.Set("User-Agent", kiroUserAgent)
+				httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
+			}
 			httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 			httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())

+			// Bearer token authentication for all auth types (Builder ID, IDC, social, etc.)
+			httpReq.Header.Set("Authorization", "Bearer "+accessToken)
+
 			var attrs map[string]string
 			if auth != nil {
 				attrs = auth.Attributes
@@ -677,6 +750,11 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox

 					if refreshedAuth != nil {
 						auth = refreshedAuth
+						// Persist the refreshed auth to file so subsequent requests use it
+						if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
+							log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
+							// Continue anyway - the token is valid for this request
+						}
 						accessToken, profileArn = kiroCredentials(auth)
 						// Rebuild payload with new profile ARN if changed
 						kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
@@ -735,6 +813,11 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
 					}
 					if refreshedAuth != nil {
 						auth = refreshedAuth
+						// Persist the refreshed auth to file so subsequent requests use it
+						if persistErr := e.persistRefreshedAuth(auth); persistErr != nil {
+							log.Warnf("kiro: failed to persist refreshed auth: %v", persistErr)
+							// Continue anyway - the token is valid for this request
+						}
 						accessToken, profileArn = kiroCredentials(auth)
 						kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from, opts.Headers)
 						log.Infof("kiro: token refreshed for 403, retrying stream request")
@@ -1001,12 +1084,12 @@ func getEffectiveProfileArn(auth *cliproxyauth.Auth, profileArn string) string {
 // This consolidates the auth_method check that was previously done separately.
 func getEffectiveProfileArnWithWarning(auth *cliproxyauth.Auth, profileArn string) string {
 	if auth != nil && auth.Metadata != nil {
-		if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" {
-			// builder-id auth doesn't need profileArn
+		if authMethod, ok := auth.Metadata["auth_method"].(string); ok && (authMethod == "builder-id" || authMethod == "idc") {
+			// builder-id and idc auth don't need profileArn
 			return ""
 		}
 	}
-	// For non-builder-id auth (social auth), profileArn is required
+	// For non-builder-id/idc auth (social auth), profileArn is required
 	if profileArn == "" {
 		log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
 	}
@@ -3010,6 +3093,7 @@ func (e *KiroExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c
 	var refreshToken string
 	var clientID, clientSecret string
 	var authMethod string
+	var region, startURL string

 	if auth.Metadata != nil {
 		if rt, ok := auth.Metadata["refresh_token"].(string); ok {
@@ -3024,6 +3108,12 @@ func (e *KiroExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c
 		if am, ok := auth.Metadata["auth_method"].(string); ok {
 			authMethod = am
 		}
+		if r, ok := auth.Metadata["region"].(string); ok {
+			region = r
+		}
+		if su, ok := auth.Metadata["start_url"].(string); ok {
+			startURL = su
+		}
 	}

 	if refreshToken == "" {
@@ -3033,12 +3123,20 @@ func (e *KiroExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c
 	var tokenData *kiroauth.KiroTokenData
 	var err error

-	// Use SSO OIDC refresh for AWS Builder ID, otherwise use Kiro's OAuth refresh endpoint
-	if clientID != "" && clientSecret != "" && authMethod == "builder-id" {
+	ssoClient := kiroauth.NewSSOOIDCClient(e.cfg)
+
+	// Use SSO OIDC refresh for AWS Builder ID or IDC, otherwise use Kiro's OAuth refresh endpoint
+	switch {
+	case clientID != "" && clientSecret != "" && authMethod == "idc" && region != "":
+		// IDC refresh with region-specific endpoint
+		log.Debugf("kiro executor: using SSO OIDC refresh for IDC (region=%s)", region)
+		tokenData, err = ssoClient.RefreshTokenWithRegion(ctx, clientID, clientSecret, refreshToken, region, startURL)
+	case clientID != "" && clientSecret != "" && authMethod == "builder-id":
+		// Builder ID refresh with default endpoint
 		log.Debugf("kiro executor: using SSO OIDC refresh for AWS Builder ID")
-		ssoClient := kiroauth.NewSSOOIDCClient(e.cfg)
 		tokenData, err = ssoClient.RefreshToken(ctx, clientID, clientSecret, refreshToken)
-	} else {
+	default:
+		// Fallback to Kiro's OAuth refresh endpoint (for social auth: Google/GitHub)
 		log.Debugf("kiro executor: using Kiro OAuth refresh endpoint")
 		oauth := kiroauth.NewKiroOAuth(e.cfg)
 		tokenData, err = oauth.RefreshToken(ctx, refreshToken)
@@ -3094,6 +3192,53 @@ func (e *KiroExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c
 	return updated, nil
 }

+// persistRefreshedAuth persists a refreshed auth record to disk.
+// This ensures token refreshes from inline retry are saved to the auth file.
+func (e *KiroExecutor) persistRefreshedAuth(auth *cliproxyauth.Auth) error {
+	if auth == nil || auth.Metadata == nil {
+		return fmt.Errorf("kiro executor: cannot persist nil auth or metadata")
+	}
+
+	// Determine the file path from auth attributes or filename
+	var authPath string
+	if auth.Attributes != nil {
+		if p := strings.TrimSpace(auth.Attributes["path"]); p != "" {
+			authPath = p
+		}
+	}
+	if authPath == "" {
+		fileName := strings.TrimSpace(auth.FileName)
+		if fileName == "" {
+			return fmt.Errorf("kiro executor: auth has no file path or filename")
+		}
+		if filepath.IsAbs(fileName) {
+			authPath = fileName
+		} else if e.cfg != nil && e.cfg.AuthDir != "" {
+			authPath = filepath.Join(e.cfg.AuthDir, fileName)
+		} else {
+			return fmt.Errorf("kiro executor: cannot determine auth file path")
+		}
+	}
+
+	// Marshal metadata to JSON
+	raw, err := json.Marshal(auth.Metadata)
+	if err != nil {
+		return fmt.Errorf("kiro executor: marshal metadata failed: %w", err)
+	}
+
+	// Write to temp file first, then rename (atomic write)
+	tmp := authPath + ".tmp"
+	if err := os.WriteFile(tmp, raw, 0o600); err != nil {
+		return fmt.Errorf("kiro executor: write temp auth file failed: %w", err)
+	}
+	if err := os.Rename(tmp, authPath); err != nil {
+		return fmt.Errorf("kiro executor: rename auth file failed: %w", err)
+	}
+
+	log.Debugf("kiro executor: persisted refreshed auth to %s", authPath)
+	return nil
+}
+
 // isTokenExpired checks if a JWT access token has expired.
 // Returns true if the token is expired or cannot be parsed.
 func (e *KiroExecutor) isTokenExpired(accessToken string) bool {
--- a/internal/runtime/executor/usage_helpers.go
+++ b/internal/runtime/executor/usage_helpers.go
@@ -275,6 +275,20 @@ func parseClaudeStreamUsage(line []byte) (usage.Detail, bool) {
 	return detail, true
 }

+func parseGeminiFamilyUsageDetail(node gjson.Result) usage.Detail {
+	detail := usage.Detail{
+		InputTokens:     node.Get("promptTokenCount").Int(),
+		OutputTokens:    node.Get("candidatesTokenCount").Int(),
+		ReasoningTokens: node.Get("thoughtsTokenCount").Int(),
+		TotalTokens:     node.Get("totalTokenCount").Int(),
+		CachedTokens:    node.Get("cachedContentTokenCount").Int(),
+	}
+	if detail.TotalTokens == 0 {
+		detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens
+	}
+	return detail
+}
+
 func parseGeminiCLIUsage(data []byte) usage.Detail {
 	usageNode := gjson.ParseBytes(data)
 	node := usageNode.Get("response.usageMetadata")
@@ -284,16 +298,7 @@ func parseGeminiCLIUsage(data []byte) usage.Detail {
 	if !node.Exists() {
 		return usage.Detail{}
 	}
-	detail := usage.Detail{
-		InputTokens:     node.Get("promptTokenCount").Int(),
-		OutputTokens:    node.Get("candidatesTokenCount").Int(),
-		ReasoningTokens: node.Get("thoughtsTokenCount").Int(),
-		TotalTokens:     node.Get("totalTokenCount").Int(),
-	}
-	if detail.TotalTokens == 0 {
-		detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens
-	}
-	return detail
+	return parseGeminiFamilyUsageDetail(node)
 }

 func parseGeminiUsage(data []byte) usage.Detail {
@@ -305,16 +310,7 @@ func parseGeminiUsage(data []byte) usage.Detail {
 	if !node.Exists() {
 		return usage.Detail{}
 	}
-	detail := usage.Detail{
-		InputTokens:     node.Get("promptTokenCount").Int(),
-		OutputTokens:    node.Get("candidatesTokenCount").Int(),
-		ReasoningTokens: node.Get("thoughtsTokenCount").Int(),
-		TotalTokens:     node.Get("totalTokenCount").Int(),
-	}
-	if detail.TotalTokens == 0 {
-		detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens
-	}
-	return detail
+	return parseGeminiFamilyUsageDetail(node)
 }

 func parseGeminiStreamUsage(line []byte) (usage.Detail, bool) {
@@ -329,16 +325,7 @@ func parseGeminiStreamUsage(line []byte) (usage.Detail, bool) {
 	if !node.Exists() {
 		return usage.Detail{}, false
 	}
-	detail := usage.Detail{
-		InputTokens:     node.Get("promptTokenCount").Int(),
-		OutputTokens:    node.Get("candidatesTokenCount").Int(),
-		ReasoningTokens: node.Get("thoughtsTokenCount").Int(),
-		TotalTokens:     node.Get("totalTokenCount").Int(),
-	}
-	if detail.TotalTokens == 0 {
-		detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens
-	}
-	return detail, true
+	return parseGeminiFamilyUsageDetail(node), true
 }

 func parseGeminiCLIStreamUsage(line []byte) (usage.Detail, bool) {
@@ -353,16 +340,7 @@ func parseGeminiCLIStreamUsage(line []byte) (usage.Detail, bool) {
 	if !node.Exists() {
 		return usage.Detail{}, false
 	}
-	detail := usage.Detail{
-		InputTokens:     node.Get("promptTokenCount").Int(),
-		OutputTokens:    node.Get("candidatesTokenCount").Int(),
-		ReasoningTokens: node.Get("thoughtsTokenCount").Int(),
-		TotalTokens:     node.Get("totalTokenCount").Int(),
-	}
-	if detail.TotalTokens == 0 {
-		detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens
-	}
-	return detail, true
+	return parseGeminiFamilyUsageDetail(node), true
 }

 func parseAntigravityUsage(data []byte) usage.Detail {
@@ -377,16 +355,7 @@ func parseAntigravityUsage(data []byte) usage.Detail {
 	if !node.Exists() {
 		return usage.Detail{}
 	}
-	detail := usage.Detail{
-		InputTokens:     node.Get("promptTokenCount").Int(),
-		OutputTokens:    node.Get("candidatesTokenCount").Int(),
-		ReasoningTokens: node.Get("thoughtsTokenCount").Int(),
-		TotalTokens:     node.Get("totalTokenCount").Int(),
-	}
-	if detail.TotalTokens == 0 {
-		detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens
-	}
-	return detail
+	return parseGeminiFamilyUsageDetail(node)
 }

 func parseAntigravityStreamUsage(line []byte) (usage.Detail, bool) {
@@ -404,16 +373,7 @@ func parseAntigravityStreamUsage(line []byte) (usage.Detail, bool) {
 	if !node.Exists() {
 		return usage.Detail{}, false
 	}
-	detail := usage.Detail{
-		InputTokens:     node.Get("promptTokenCount").Int(),
-		OutputTokens:    node.Get("candidatesTokenCount").Int(),
-		ReasoningTokens: node.Get("thoughtsTokenCount").Int(),
-		TotalTokens:     node.Get("totalTokenCount").Int(),
-	}
-	if detail.TotalTokens == 0 {
-		detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens
-	}
-	return detail, true
+	return parseGeminiFamilyUsageDetail(node), true
 }

 var stopChunkWithoutUsage sync.Map
--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -35,6 +35,7 @@ type Params struct {
 	CandidatesTokenCount int64  // Cached candidate token count from usage metadata
 	ThoughtsTokenCount   int64  // Cached thinking token count from usage metadata
 	TotalTokenCount      int64  // Cached total token count from usage metadata
+	CachedTokenCount     int64  // Cached content token count (indicates prompt caching)
 	HasSentFinalEvents   bool   // Indicates if final content/message events have been sent
 	HasToolUse           bool   // Indicates if tool use was observed in the stream
 	HasContent           bool   // Tracks whether any content (text, thinking, or tool use) has been output
@@ -274,6 +275,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 		params.CandidatesTokenCount = usageResult.Get("candidatesTokenCount").Int()
 		params.ThoughtsTokenCount = usageResult.Get("thoughtsTokenCount").Int()
 		params.TotalTokenCount = usageResult.Get("totalTokenCount").Int()
+		params.CachedTokenCount = usageResult.Get("cachedContentTokenCount").Int()
 		if params.CandidatesTokenCount == 0 && params.TotalTokenCount > 0 {
 			params.CandidatesTokenCount = params.TotalTokenCount - params.PromptTokenCount - params.ThoughtsTokenCount
 			if params.CandidatesTokenCount < 0 {
@@ -322,6 +324,14 @@ func appendFinalEvents(params *Params, output *string, force bool) {
 	*output = *output + "event: message_delta\n"
 	*output = *output + "data: "
 	delta := fmt.Sprintf(`{"type":"message_delta","delta":{"stop_reason":"%s","stop_sequence":null},"usage":{"input_tokens":%d,"output_tokens":%d}}`, stopReason, params.PromptTokenCount, usageOutputTokens)
+	// Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working)
+	if params.CachedTokenCount > 0 {
+		var err error
+		delta, err = sjson.Set(delta, "usage.cache_read_input_tokens", params.CachedTokenCount)
+		if err != nil {
+			log.Warnf("antigravity claude response: failed to set cache_read_input_tokens: %v", err)
+		}
+	}
 	*output = *output + delta + "\n\n\n"

 	params.HasSentFinalEvents = true
@@ -361,6 +371,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
 	candidateTokens := root.Get("response.usageMetadata.candidatesTokenCount").Int()
 	thoughtTokens := root.Get("response.usageMetadata.thoughtsTokenCount").Int()
 	totalTokens := root.Get("response.usageMetadata.totalTokenCount").Int()
+	cachedTokens := root.Get("response.usageMetadata.cachedContentTokenCount").Int()
 	outputTokens := candidateTokens + thoughtTokens
 	if outputTokens == 0 && totalTokens > 0 {
 		outputTokens = totalTokens - promptTokens
@@ -374,6 +385,14 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
 	responseJSON, _ = sjson.Set(responseJSON, "model", root.Get("response.modelVersion").String())
 	responseJSON, _ = sjson.Set(responseJSON, "usage.input_tokens", promptTokens)
 	responseJSON, _ = sjson.Set(responseJSON, "usage.output_tokens", outputTokens)
+	// Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working)
+	if cachedTokens > 0 {
+		var err error
+		responseJSON, err = sjson.Set(responseJSON, "usage.cache_read_input_tokens", cachedTokens)
+		if err != nil {
+			log.Warnf("antigravity claude response: failed to set cache_read_input_tokens: %v", err)
+		}
+	}

 	contentArrayInitialized := false
 	ensureContentArray := func() {
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
@@ -13,6 +13,8 @@ import (
 	"sync/atomic"
 	"time"

+	log "github.com/sirupsen/logrus"
+
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -93,10 +95,19 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
 		}
 		promptTokenCount := usageResult.Get("promptTokenCount").Int()
 		thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
+		// Include cached token count if present (indicates prompt caching is working)
+		if cachedTokenCount > 0 {
+			var err error
+			template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+			if err != nil {
+				log.Warnf("antigravity openai response: failed to set cached_tokens: %v", err)
+			}
+		}
 	}

 	// Process the main content part of the response.
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -244,7 +244,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 					out, _ = sjson.SetRawBytes(out, "request.contents.-1", node)

 					// Append a single tool content combining name + response per function
-					toolNode := []byte(`{"role":"tool","parts":[]}`)
+					toolNode := []byte(`{"role":"user","parts":[]}`)
 					pp := 0
 					for _, fid := range fIDs {
 						if name, ok := tcID2Name[fid]; ok {
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -286,7 +286,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 					out, _ = sjson.SetRawBytes(out, "contents.-1", node)

 					// Append a single tool content combining name + response per function
-					toolNode := []byte(`{"role":"tool","parts":[]}`)
+					toolNode := []byte(`{"role":"user","parts":[]}`)
 					pp := 0
 					for _, fid := range fIDs {
 						if name, ok := tcID2Name[fid]; ok {
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -13,6 +13,7 @@ import (
 	"sync/atomic"
 	"time"

+	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -96,10 +97,19 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 		}
 		promptTokenCount := usageResult.Get("promptTokenCount").Int()
 		thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
+		// Include cached token count if present (indicates prompt caching is working)
+		if cachedTokenCount > 0 {
+			var err error
+			template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+			if err != nil {
+				log.Warnf("gemini openai response: failed to set cached_tokens in streaming: %v", err)
+			}
+		}
 	}

 	// Process the main content part of the response.
@@ -240,10 +250,19 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
 		}
 		promptTokenCount := usageResult.Get("promptTokenCount").Int()
 		thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
+		// Include cached token count if present (indicates prompt caching is working)
+		if cachedTokenCount > 0 {
+			var err error
+			template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+			if err != nil {
+				log.Warnf("gemini openai response: failed to set cached_tokens in non-streaming: %v", err)
+			}
+		}
 	}

 	// Process the main content part of the response.
--- a/internal/translator/kiro/common/message_merge.go
+++ b/internal/translator/kiro/common/message_merge.go
@@ -10,6 +10,7 @@ import (
 // MergeAdjacentMessages merges adjacent messages with the same role.
 // This reduces API call complexity and improves compatibility.
 // Based on AIClient-2-API implementation.
+// NOTE: Tool messages are NOT merged because each has a unique tool_call_id that must be preserved.
 func MergeAdjacentMessages(messages []gjson.Result) []gjson.Result {
 	if len(messages) <= 1 {
 		return messages
@@ -26,6 +27,12 @@ func MergeAdjacentMessages(messages []gjson.Result) []gjson.Result {
 		currentRole := msg.Get("role").String()
 		lastRole := lastMsg.Get("role").String()

+		// Don't merge tool messages - each has a unique tool_call_id
+		if currentRole == "tool" || lastRole == "tool" {
+			merged = append(merged, msg)
+			continue
+		}
+
 		if currentRole == lastRole {
 			// Merge content from current message into last message
 			mergedContent := mergeMessageContent(lastMsg, msg)
--- a/internal/translator/kiro/openai/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/kiro_openai_request.go
@@ -450,24 +450,10 @@ func processOpenAIMessages(messages gjson.Result, modelID, origin string) ([]Kir
 	// Merge adjacent messages with the same role
 	messagesArray := kirocommon.MergeAdjacentMessages(messages.Array())

-	// Build tool_call_id to name mapping from assistant messages
-	toolCallIDToName := make(map[string]string)
-	for _, msg := range messagesArray {
-		if msg.Get("role").String() == "assistant" {
-			toolCalls := msg.Get("tool_calls")
-			if toolCalls.IsArray() {
-				for _, tc := range toolCalls.Array() {
-					if tc.Get("type").String() == "function" {
-						id := tc.Get("id").String()
-						name := tc.Get("function.name").String()
-						if id != "" && name != "" {
-							toolCallIDToName[id] = name
-						}
-					}
-				}
-			}
-		}
-	}
+	// Track pending tool results that should be attached to the next user message
+	// This is critical for LiteLLM-translated requests where tool results appear
+	// as separate "tool" role messages between assistant and user messages
+	var pendingToolResults []KiroToolResult

 	for i, msg := range messagesArray {
 		role := msg.Get("role").String()
@@ -480,6 +466,10 @@ func processOpenAIMessages(messages gjson.Result, modelID, origin string) ([]Kir

 		case "user":
 			userMsg, toolResults := buildUserMessageFromOpenAI(msg, modelID, origin)
+			// Merge any pending tool results from preceding "tool" role messages
+			toolResults = append(pendingToolResults, toolResults...)
+			pendingToolResults = nil // Reset pending tool results
+
 			if isLastMessage {
 				currentUserMsg = &userMsg
 				currentToolResults = toolResults
@@ -505,6 +495,24 @@ func processOpenAIMessages(messages gjson.Result, modelID, origin string) ([]Kir

 		case "assistant":
 			assistantMsg := buildAssistantMessageFromOpenAI(msg)
+
+			// If there are pending tool results, we need to insert a synthetic user message
+			// before this assistant message to maintain proper conversation structure
+			if len(pendingToolResults) > 0 {
+				syntheticUserMsg := KiroUserInputMessage{
+					Content: "Tool results provided.",
+					ModelID: modelID,
+					Origin:  origin,
+					UserInputMessageContext: &KiroUserInputMessageContext{
+						ToolResults: pendingToolResults,
+					},
+				}
+				history = append(history, KiroHistoryMessage{
+					UserInputMessage: &syntheticUserMsg,
+				})
+				pendingToolResults = nil
+			}
+
 			if isLastMessage {
 				history = append(history, KiroHistoryMessage{
 					AssistantResponseMessage: &assistantMsg,
@@ -524,7 +532,7 @@ func processOpenAIMessages(messages gjson.Result, modelID, origin string) ([]Kir
 		case "tool":
 			// Tool messages in OpenAI format provide results for tool_calls
 			// These are typically followed by user or assistant messages
-			// Process them and merge into the next user message's tool results
+			// Collect them as pending and attach to the next user message
 			toolCallID := msg.Get("tool_call_id").String()
 			content := msg.Get("content").String()

@@ -534,9 +542,21 @@ func processOpenAIMessages(messages gjson.Result, modelID, origin string) ([]Kir
 					Content:   []KiroTextContent{{Text: content}},
 					Status:    "success",
 				}
-				// Tool results should be included in the next user message
-				// For now, collect them and they'll be handled when we build the current message
-				currentToolResults = append(currentToolResults, toolResult)
+				// Collect pending tool results to attach to the next user message
+				pendingToolResults = append(pendingToolResults, toolResult)
+			}
+		}
+	}
+
+	// Handle case where tool results are at the end with no following user message
+	if len(pendingToolResults) > 0 {
+		currentToolResults = append(currentToolResults, pendingToolResults...)
+		// If there's no current user message, create a synthetic one for the tool results
+		if currentUserMsg == nil {
+			currentUserMsg = &KiroUserInputMessage{
+				Content: "Tool results provided.",
+				ModelID: modelID,
+				Origin:  origin,
 			}
 		}
 	}
@@ -551,9 +571,6 @@ func buildUserMessageFromOpenAI(msg gjson.Result, modelID, origin string) (KiroU
 	var toolResults []KiroToolResult
 	var images []KiroImage

-	// Track seen toolCallIds to deduplicate
-	seenToolCallIDs := make(map[string]bool)
-
 	if content.IsArray() {
 		for _, part := range content.Array() {
 			partType := part.Get("type").String()
@@ -589,9 +606,6 @@ func buildUserMessageFromOpenAI(msg gjson.Result, modelID, origin string) (KiroU
 		contentBuilder.WriteString(content.String())
 	}

-	// Check for tool_calls in the message (shouldn't be in user messages, but handle edge cases)
-	_ = seenToolCallIDs // Used for deduplication if needed
-
 	userMsg := KiroUserInputMessage{
 		Content: contentBuilder.String(),
 		ModelID: modelID,
--- a/internal/translator/kiro/openai/kiro_openai_request_test.go
+++ b/internal/translator/kiro/openai/kiro_openai_request_test.go
@@ -0,0 +1,386 @@
+package openai
+
+import (
+	"encoding/json"
+	"testing"
+)
+
+// TestToolResultsAttachedToCurrentMessage verifies that tool results from "tool" role messages
+// are properly attached to the current user message (the last message in the conversation).
+// This is critical for LiteLLM-translated requests where tool results appear as separate messages.
+func TestToolResultsAttachedToCurrentMessage(t *testing.T) {
+	// OpenAI format request simulating LiteLLM's translation from Anthropic format
+	// Sequence: user -> assistant (with tool_calls) -> tool (result) -> user
+	// The last user message should have the tool results attached
+	input := []byte(`{
+		"model": "kiro-claude-opus-4-5-agentic",
+		"messages": [
+			{"role": "user", "content": "Hello, can you read a file for me?"},
+			{
+				"role": "assistant",
+				"content": "I'll read that file for you.",
+				"tool_calls": [
+					{
+						"id": "call_abc123",
+						"type": "function",
+						"function": {
+							"name": "Read",
+							"arguments": "{\"file_path\": \"/tmp/test.txt\"}"
+						}
+					}
+				]
+			},
+			{
+				"role": "tool",
+				"tool_call_id": "call_abc123",
+				"content": "File contents: Hello World!"
+			},
+			{"role": "user", "content": "What did the file say?"}
+		]
+	}`)
+
+	result, _ := BuildKiroPayloadFromOpenAI(input, "kiro-model", "", "CLI", false, false, nil, nil)
+
+	var payload KiroPayload
+	if err := json.Unmarshal(result, &payload); err != nil {
+		t.Fatalf("Failed to unmarshal result: %v", err)
+	}
+
+	// The last user message becomes currentMessage
+	// History should have: user (first), assistant (with tool_calls)
+	t.Logf("History count: %d", len(payload.ConversationState.History))
+	if len(payload.ConversationState.History) != 2 {
+		t.Errorf("Expected 2 history entries (user + assistant), got %d", len(payload.ConversationState.History))
+	}
+
+	// Tool results should be attached to currentMessage (the last user message)
+	ctx := payload.ConversationState.CurrentMessage.UserInputMessage.UserInputMessageContext
+	if ctx == nil {
+		t.Fatal("Expected currentMessage to have UserInputMessageContext with tool results")
+	}
+
+	if len(ctx.ToolResults) != 1 {
+		t.Fatalf("Expected 1 tool result in currentMessage, got %d", len(ctx.ToolResults))
+	}
+
+	tr := ctx.ToolResults[0]
+	if tr.ToolUseID != "call_abc123" {
+		t.Errorf("Expected toolUseId 'call_abc123', got '%s'", tr.ToolUseID)
+	}
+	if len(tr.Content) == 0 || tr.Content[0].Text != "File contents: Hello World!" {
+		t.Errorf("Tool result content mismatch, got: %+v", tr.Content)
+	}
+}
+
+// TestToolResultsInHistoryUserMessage verifies that when there are multiple user messages
+// after tool results, the tool results are attached to the correct user message in history.
+func TestToolResultsInHistoryUserMessage(t *testing.T) {
+	// Sequence: user -> assistant (with tool_calls) -> tool (result) -> user -> assistant -> user
+	// The first user after tool should have tool results in history
+	input := []byte(`{
+		"model": "kiro-claude-opus-4-5-agentic",
+		"messages": [
+			{"role": "user", "content": "Hello"},
+			{
+				"role": "assistant",
+				"content": "I'll read the file.",
+				"tool_calls": [
+					{
+						"id": "call_1",
+						"type": "function",
+						"function": {
+							"name": "Read",
+							"arguments": "{}"
+						}
+					}
+				]
+			},
+			{
+				"role": "tool",
+				"tool_call_id": "call_1",
+				"content": "File result"
+			},
+			{"role": "user", "content": "Thanks for the file"},
+			{"role": "assistant", "content": "You're welcome"},
+			{"role": "user", "content": "Bye"}
+		]
+	}`)
+
+	result, _ := BuildKiroPayloadFromOpenAI(input, "kiro-model", "", "CLI", false, false, nil, nil)
+
+	var payload KiroPayload
+	if err := json.Unmarshal(result, &payload); err != nil {
+		t.Fatalf("Failed to unmarshal result: %v", err)
+	}
+
+	// History should have: user, assistant, user (with tool results), assistant
+	// CurrentMessage should be: last user "Bye"
+	t.Logf("History count: %d", len(payload.ConversationState.History))
+
+	// Find the user message in history with tool results
+	foundToolResults := false
+	for i, h := range payload.ConversationState.History {
+		if h.UserInputMessage != nil {
+			t.Logf("History[%d]: user message content=%q", i, h.UserInputMessage.Content)
+			if h.UserInputMessage.UserInputMessageContext != nil {
+				if len(h.UserInputMessage.UserInputMessageContext.ToolResults) > 0 {
+					foundToolResults = true
+					t.Logf("  Found %d tool results", len(h.UserInputMessage.UserInputMessageContext.ToolResults))
+					tr := h.UserInputMessage.UserInputMessageContext.ToolResults[0]
+					if tr.ToolUseID != "call_1" {
+						t.Errorf("Expected toolUseId 'call_1', got '%s'", tr.ToolUseID)
+					}
+				}
+			}
+		}
+		if h.AssistantResponseMessage != nil {
+			t.Logf("History[%d]: assistant message content=%q", i, h.AssistantResponseMessage.Content)
+		}
+	}
+
+	if !foundToolResults {
+		t.Error("Tool results were not attached to any user message in history")
+	}
+}
+
+// TestToolResultsWithMultipleToolCalls verifies handling of multiple tool calls
+func TestToolResultsWithMultipleToolCalls(t *testing.T) {
+	input := []byte(`{
+		"model": "kiro-claude-opus-4-5-agentic",
+		"messages": [
+			{"role": "user", "content": "Read two files for me"},
+			{
+				"role": "assistant",
+				"content": "I'll read both files.",
+				"tool_calls": [
+					{
+						"id": "call_1",
+						"type": "function",
+						"function": {
+							"name": "Read",
+							"arguments": "{\"file_path\": \"/tmp/file1.txt\"}"
+						}
+					},
+					{
+						"id": "call_2",
+						"type": "function",
+						"function": {
+							"name": "Read",
+							"arguments": "{\"file_path\": \"/tmp/file2.txt\"}"
+						}
+					}
+				]
+			},
+			{
+				"role": "tool",
+				"tool_call_id": "call_1",
+				"content": "Content of file 1"
+			},
+			{
+				"role": "tool",
+				"tool_call_id": "call_2",
+				"content": "Content of file 2"
+			},
+			{"role": "user", "content": "What do they say?"}
+		]
+	}`)
+
+	result, _ := BuildKiroPayloadFromOpenAI(input, "kiro-model", "", "CLI", false, false, nil, nil)
+
+	var payload KiroPayload
+	if err := json.Unmarshal(result, &payload); err != nil {
+		t.Fatalf("Failed to unmarshal result: %v", err)
+	}
+
+	t.Logf("History count: %d", len(payload.ConversationState.History))
+	t.Logf("CurrentMessage content: %q", payload.ConversationState.CurrentMessage.UserInputMessage.Content)
+
+	// Check if there are any tool results anywhere
+	var totalToolResults int
+	for i, h := range payload.ConversationState.History {
+		if h.UserInputMessage != nil && h.UserInputMessage.UserInputMessageContext != nil {
+			count := len(h.UserInputMessage.UserInputMessageContext.ToolResults)
+			t.Logf("History[%d] user message has %d tool results", i, count)
+			totalToolResults += count
+		}
+	}
+
+	ctx := payload.ConversationState.CurrentMessage.UserInputMessage.UserInputMessageContext
+	if ctx != nil {
+		t.Logf("CurrentMessage has %d tool results", len(ctx.ToolResults))
+		totalToolResults += len(ctx.ToolResults)
+	} else {
+		t.Logf("CurrentMessage has no UserInputMessageContext")
+	}
+
+	if totalToolResults != 2 {
+		t.Errorf("Expected 2 tool results total, got %d", totalToolResults)
+	}
+}
+
+// TestToolResultsAtEndOfConversation verifies tool results are handled when
+// the conversation ends with tool results (no following user message)
+func TestToolResultsAtEndOfConversation(t *testing.T) {
+	input := []byte(`{
+		"model": "kiro-claude-opus-4-5-agentic",
+		"messages": [
+			{"role": "user", "content": "Read a file"},
+			{
+				"role": "assistant",
+				"content": "Reading the file.",
+				"tool_calls": [
+					{
+						"id": "call_end",
+						"type": "function",
+						"function": {
+							"name": "Read",
+							"arguments": "{\"file_path\": \"/tmp/test.txt\"}"
+						}
+					}
+				]
+			},
+			{
+				"role": "tool",
+				"tool_call_id": "call_end",
+				"content": "File contents here"
+			}
+		]
+	}`)
+
+	result, _ := BuildKiroPayloadFromOpenAI(input, "kiro-model", "", "CLI", false, false, nil, nil)
+
+	var payload KiroPayload
+	if err := json.Unmarshal(result, &payload); err != nil {
+		t.Fatalf("Failed to unmarshal result: %v", err)
+	}
+
+	// When the last message is a tool result, a synthetic user message is created
+	// and tool results should be attached to it
+	ctx := payload.ConversationState.CurrentMessage.UserInputMessage.UserInputMessageContext
+	if ctx == nil || len(ctx.ToolResults) == 0 {
+		t.Error("Expected tool results to be attached to current message when conversation ends with tool result")
+	} else {
+		if ctx.ToolResults[0].ToolUseID != "call_end" {
+			t.Errorf("Expected toolUseId 'call_end', got '%s'", ctx.ToolResults[0].ToolUseID)
+		}
+	}
+}
+
+// TestToolResultsFollowedByAssistant verifies handling when tool results are followed
+// by an assistant message (no intermediate user message).
+// This is the pattern from LiteLLM translation of Anthropic format where:
+// user message has ONLY tool_result blocks -> LiteLLM creates tool messages
+// then the next message is assistant
+func TestToolResultsFollowedByAssistant(t *testing.T) {
+	// Sequence: user -> assistant (with tool_calls) -> tool -> tool -> assistant -> user
+	// This simulates LiteLLM's translation of:
+	//   user: "Read files"
+	//   assistant: [tool_use, tool_use]
+	//   user: [tool_result, tool_result]  <- becomes multiple "tool" role messages
+	//   assistant: "I've read them"
+	//   user: "What did they say?"
+	input := []byte(`{
+		"model": "kiro-claude-opus-4-5-agentic",
+		"messages": [
+			{"role": "user", "content": "Read two files for me"},
+			{
+				"role": "assistant",
+				"content": "I'll read both files.",
+				"tool_calls": [
+					{
+						"id": "call_1",
+						"type": "function",
+						"function": {
+							"name": "Read",
+							"arguments": "{\"file_path\": \"/tmp/a.txt\"}"
+						}
+					},
+					{
+						"id": "call_2",
+						"type": "function",
+						"function": {
+							"name": "Read",
+							"arguments": "{\"file_path\": \"/tmp/b.txt\"}"
+						}
+					}
+				]
+			},
+			{
+				"role": "tool",
+				"tool_call_id": "call_1",
+				"content": "Contents of file A"
+			},
+			{
+				"role": "tool",
+				"tool_call_id": "call_2",
+				"content": "Contents of file B"
+			},
+			{
+				"role": "assistant",
+				"content": "I've read both files."
+			},
+			{"role": "user", "content": "What did they say?"}
+		]
+	}`)
+
+	result, _ := BuildKiroPayloadFromOpenAI(input, "kiro-model", "", "CLI", false, false, nil, nil)
+
+	var payload KiroPayload
+	if err := json.Unmarshal(result, &payload); err != nil {
+		t.Fatalf("Failed to unmarshal result: %v", err)
+	}
+
+	t.Logf("History count: %d", len(payload.ConversationState.History))
+
+	// Tool results should be attached to a synthetic user message or the history should be valid
+	var totalToolResults int
+	for i, h := range payload.ConversationState.History {
+		if h.UserInputMessage != nil {
+			t.Logf("History[%d]: user message content=%q", i, h.UserInputMessage.Content)
+			if h.UserInputMessage.UserInputMessageContext != nil {
+				count := len(h.UserInputMessage.UserInputMessageContext.ToolResults)
+				t.Logf("  Has %d tool results", count)
+				totalToolResults += count
+			}
+		}
+		if h.AssistantResponseMessage != nil {
+			t.Logf("History[%d]: assistant message content=%q", i, h.AssistantResponseMessage.Content)
+		}
+	}
+
+	ctx := payload.ConversationState.CurrentMessage.UserInputMessage.UserInputMessageContext
+	if ctx != nil {
+		t.Logf("CurrentMessage has %d tool results", len(ctx.ToolResults))
+		totalToolResults += len(ctx.ToolResults)
+	}
+
+	if totalToolResults != 2 {
+		t.Errorf("Expected 2 tool results total, got %d", totalToolResults)
+	}
+}
+
+// TestAssistantEndsConversation verifies handling when assistant is the last message
+func TestAssistantEndsConversation(t *testing.T) {
+	input := []byte(`{
+		"model": "kiro-claude-opus-4-5-agentic",
+		"messages": [
+			{"role": "user", "content": "Hello"},
+			{
+				"role": "assistant",
+				"content": "Hi there!"
+			}
+		]
+	}`)
+
+	result, _ := BuildKiroPayloadFromOpenAI(input, "kiro-model", "", "CLI", false, false, nil, nil)
+
+	var payload KiroPayload
+	if err := json.Unmarshal(result, &payload); err != nil {
+		t.Fatalf("Failed to unmarshal result: %v", err)
+	}
+
+	// When assistant is last, a "Continue" user message should be created
+	if payload.ConversationState.CurrentMessage.UserInputMessage.Content == "" {
+		t.Error("Expected a 'Continue' message to be created when assistant is last")
+	}
+}
--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -14,7 +14,6 @@ import (
 	"fmt"
 	"io"
 	"net/http"
-	"time"

 	"github.com/gin-gonic/gin"
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
@@ -185,14 +184,6 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
 //   - c: The Gin context for the request.
 //   - rawJSON: The raw JSON request body.
 func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
-	// Set up Server-Sent Events (SSE) headers for streaming response
-	// These headers are essential for maintaining a persistent connection
-	// and enabling real-time streaming of chat completions
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
 	// Get the http.Flusher interface to manually flush the response.
 	// This is crucial for streaming as it allows immediate sending of data chunks
 	flusher, ok := c.Writer.(http.Flusher)
@@ -213,58 +204,82 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())

 	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
-	h.forwardClaudeStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
-	return
-}
+	setSSEHeaders := func() {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}

-func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
-	// OpenAI-style stream forwarding: write each SSE chunk and flush immediately.
-	// This guarantees clients see incremental output even for small responses.
+	// Peek at the first chunk to determine success or failure before setting headers
 	for {
 		select {
 		case <-c.Request.Context().Done():
-			cancel(c.Request.Context().Err())
+			cliCancel(c.Request.Context().Err())
 			return
-
-		case chunk, ok := <-data:
+		case errMsg, ok := <-errChan:
 			if !ok {
+				// Err channel closed cleanly; wait for data channel.
+				errChan = nil
+				continue
+			}
+			// Upstream failed immediately. Return proper error status and JSON.
+			h.WriteErrorResponse(c, errMsg)
+			if errMsg != nil {
+				cliCancel(errMsg.Error)
+			} else {
+				cliCancel(nil)
+			}
+			return
+		case chunk, ok := <-dataChan:
+			if !ok {
+				// Stream closed without data? Send DONE or just headers.
+				setSSEHeaders()
 				flusher.Flush()
-				cancel(nil)
+				cliCancel(nil)
 				return
 			}
+
+			// Success! Set headers now.
+			setSSEHeaders()
+
+			// Write the first chunk
 			if len(chunk) > 0 {
 				_, _ = c.Writer.Write(chunk)
 				flusher.Flush()
 			}

-		case errMsg, ok := <-errs:
-			if !ok {
-				continue
-			}
-			if errMsg != nil {
-				status := http.StatusInternalServerError
-				if errMsg.StatusCode > 0 {
-					status = errMsg.StatusCode
-				}
-				c.Status(status)
-
-				// An error occurred: emit as a proper SSE error event
-				errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
-				_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errorBytes)
-				flusher.Flush()
-			}
-
-			var execErr error
-			if errMsg != nil {
-				execErr = errMsg.Error
-			}
-			cancel(execErr)
+			// Continue streaming the rest
+			h.forwardClaudeStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
 			return
-		case <-time.After(500 * time.Millisecond):
 		}
 	}
 }

+func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
+		WriteChunk: func(chunk []byte) {
+			if len(chunk) == 0 {
+				return
+			}
+			_, _ = c.Writer.Write(chunk)
+		},
+		WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
+			if errMsg == nil {
+				return
+			}
+			status := http.StatusInternalServerError
+			if errMsg.StatusCode > 0 {
+				status = errMsg.StatusCode
+			}
+			c.Status(status)
+
+			errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
+			_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errorBytes)
+		},
+	})
+}
+
 type claudeErrorDetail struct {
 	Type    string `json:"type"`
 	Message string `json:"message"`
--- a/sdk/api/handlers/gemini/gemini-cli_handlers.go
+++ b/sdk/api/handlers/gemini/gemini-cli_handlers.go
@@ -182,19 +182,18 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ
 }

 func (h *GeminiCLIAPIHandler) forwardCLIStream(c *gin.Context, flusher http.Flusher, alt string, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
-	for {
-		select {
-		case <-c.Request.Context().Done():
-			cancel(c.Request.Context().Err())
-			return
-		case chunk, ok := <-data:
-			if !ok {
-				cancel(nil)
-				return
-			}
+	var keepAliveInterval *time.Duration
+	if alt != "" {
+		disabled := time.Duration(0)
+		keepAliveInterval = &disabled
+	}
+
+	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
+		KeepAliveInterval: keepAliveInterval,
+		WriteChunk: func(chunk []byte) {
 			if alt == "" {
 				if bytes.Equal(chunk, []byte("data: [DONE]")) || bytes.Equal(chunk, []byte("[DONE]")) {
-					continue
+					return
 				}

 				if !bytes.HasPrefix(chunk, []byte("data:")) {
@@ -206,22 +205,25 @@ func (h *GeminiCLIAPIHandler) forwardCLIStream(c *gin.Context, flusher http.Flus
 			} else {
 				_, _ = c.Writer.Write(chunk)
 			}
-			flusher.Flush()
-		case errMsg, ok := <-errs:
-			if !ok {
-				continue
+		},
+		WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
+			if errMsg == nil {
+				return
 			}
-			if errMsg != nil {
-				h.WriteErrorResponse(c, errMsg)
-				flusher.Flush()
+			status := http.StatusInternalServerError
+			if errMsg.StatusCode > 0 {
+				status = errMsg.StatusCode
 			}
-			var execErr error
-			if errMsg != nil {
-				execErr = errMsg.Error
+			errText := http.StatusText(status)
+			if errMsg.Error != nil && errMsg.Error.Error() != "" {
+				errText = errMsg.Error.Error()
 			}
-			cancel(execErr)
-			return
-		case <-time.After(500 * time.Millisecond):
-		}
-	}
+			body := handlers.BuildErrorResponseBody(status, errText)
+			if alt == "" {
+				_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", string(body))
+			} else {
+				_, _ = c.Writer.Write(body)
+			}
+		},
+	})
 }
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -226,13 +226,6 @@ func (h *GeminiAPIHandler) GeminiHandler(c *gin.Context) {
 func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName string, rawJSON []byte) {
 	alt := h.GetAlt(c)

-	if alt == "" {
-		c.Header("Content-Type", "text/event-stream")
-		c.Header("Cache-Control", "no-cache")
-		c.Header("Connection", "keep-alive")
-		c.Header("Access-Control-Allow-Origin", "*")
-	}
-
 	// Get the http.Flusher interface to manually flush the response.
 	flusher, ok := c.Writer.(http.Flusher)
 	if !ok {
@@ -247,8 +240,65 @@ func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName

 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
 	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
-	h.forwardGeminiStream(c, flusher, alt, func(err error) { cliCancel(err) }, dataChan, errChan)
-	return
+
+	setSSEHeaders := func() {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Peek at the first chunk
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cliCancel(c.Request.Context().Err())
+			return
+		case errMsg, ok := <-errChan:
+			if !ok {
+				// Err channel closed cleanly; wait for data channel.
+				errChan = nil
+				continue
+			}
+			// Upstream failed immediately. Return proper error status and JSON.
+			h.WriteErrorResponse(c, errMsg)
+			if errMsg != nil {
+				cliCancel(errMsg.Error)
+			} else {
+				cliCancel(nil)
+			}
+			return
+		case chunk, ok := <-dataChan:
+			if !ok {
+				// Closed without data
+				if alt == "" {
+					setSSEHeaders()
+				}
+				flusher.Flush()
+				cliCancel(nil)
+				return
+			}
+
+			// Success! Set headers.
+			if alt == "" {
+				setSSEHeaders()
+			}
+
+			// Write first chunk
+			if alt == "" {
+				_, _ = c.Writer.Write([]byte("data: "))
+				_, _ = c.Writer.Write(chunk)
+				_, _ = c.Writer.Write([]byte("\n\n"))
+			} else {
+				_, _ = c.Writer.Write(chunk)
+			}
+			flusher.Flush()
+
+			// Continue
+			h.forwardGeminiStream(c, flusher, alt, func(err error) { cliCancel(err) }, dataChan, errChan)
+			return
+		}
+	}
 }

 // handleCountTokens handles token counting requests for Gemini models.
@@ -297,16 +347,15 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
 }

 func (h *GeminiAPIHandler) forwardGeminiStream(c *gin.Context, flusher http.Flusher, alt string, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
-	for {
-		select {
-		case <-c.Request.Context().Done():
-			cancel(c.Request.Context().Err())
-			return
-		case chunk, ok := <-data:
-			if !ok {
-				cancel(nil)
-				return
-			}
+	var keepAliveInterval *time.Duration
+	if alt != "" {
+		disabled := time.Duration(0)
+		keepAliveInterval = &disabled
+	}
+
+	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
+		KeepAliveInterval: keepAliveInterval,
+		WriteChunk: func(chunk []byte) {
 			if alt == "" {
 				_, _ = c.Writer.Write([]byte("data: "))
 				_, _ = c.Writer.Write(chunk)
@@ -314,22 +363,25 @@ func (h *GeminiAPIHandler) forwardGeminiStream(c *gin.Context, flusher http.Flus
 			} else {
 				_, _ = c.Writer.Write(chunk)
 			}
-			flusher.Flush()
-		case errMsg, ok := <-errs:
-			if !ok {
-				continue
+		},
+		WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
+			if errMsg == nil {
+				return
 			}
-			if errMsg != nil {
-				h.WriteErrorResponse(c, errMsg)
-				flusher.Flush()
+			status := http.StatusInternalServerError
+			if errMsg.StatusCode > 0 {
+				status = errMsg.StatusCode
 			}
-			var execErr error
-			if errMsg != nil {
-				execErr = errMsg.Error
+			errText := http.StatusText(status)
+			if errMsg.Error != nil && errMsg.Error.Error() != "" {
+				errText = errMsg.Error.Error()
 			}
-			cancel(execErr)
-			return
-		case <-time.After(500 * time.Millisecond):
-		}
-	}
+			body := handlers.BuildErrorResponseBody(status, errText)
+			if alt == "" {
+				_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", string(body))
+			} else {
+				_, _ = c.Writer.Write(body)
+			}
+		},
+	})
 }
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -9,9 +9,12 @@ import (
 	"fmt"
 	"net/http"
 	"strings"
+	"time"

 	"github.com/gin-gonic/gin"
+	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -40,6 +43,117 @@ type ErrorDetail struct {
 	Code string `json:"code,omitempty"`
 }

+const idempotencyKeyMetadataKey = "idempotency_key"
+
+const (
+	defaultStreamingKeepAliveSeconds = 0
+	defaultStreamingBootstrapRetries = 0
+)
+
+// BuildErrorResponseBody builds an OpenAI-compatible JSON error response body.
+// If errText is already valid JSON, it is returned as-is to preserve upstream error payloads.
+func BuildErrorResponseBody(status int, errText string) []byte {
+	if status <= 0 {
+		status = http.StatusInternalServerError
+	}
+	if strings.TrimSpace(errText) == "" {
+		errText = http.StatusText(status)
+	}
+
+	trimmed := strings.TrimSpace(errText)
+	if trimmed != "" && json.Valid([]byte(trimmed)) {
+		return []byte(trimmed)
+	}
+
+	errType := "invalid_request_error"
+	var code string
+	switch status {
+	case http.StatusUnauthorized:
+		errType = "authentication_error"
+		code = "invalid_api_key"
+	case http.StatusForbidden:
+		errType = "permission_error"
+		code = "insufficient_quota"
+	case http.StatusTooManyRequests:
+		errType = "rate_limit_error"
+		code = "rate_limit_exceeded"
+	case http.StatusNotFound:
+		errType = "invalid_request_error"
+		code = "model_not_found"
+	default:
+		if status >= http.StatusInternalServerError {
+			errType = "server_error"
+			code = "internal_server_error"
+		}
+	}
+
+	payload, err := json.Marshal(ErrorResponse{
+		Error: ErrorDetail{
+			Message: errText,
+			Type:    errType,
+			Code:    code,
+		},
+	})
+	if err != nil {
+		return []byte(fmt.Sprintf(`{"error":{"message":%q,"type":"server_error","code":"internal_server_error"}}`, errText))
+	}
+	return payload
+}
+
+// StreamingKeepAliveInterval returns the SSE keep-alive interval for this server.
+// Returning 0 disables keep-alives (default when unset).
+func StreamingKeepAliveInterval(cfg *config.SDKConfig) time.Duration {
+	seconds := defaultStreamingKeepAliveSeconds
+	if cfg != nil && cfg.Streaming.KeepAliveSeconds != nil {
+		seconds = *cfg.Streaming.KeepAliveSeconds
+	}
+	if seconds <= 0 {
+		return 0
+	}
+	return time.Duration(seconds) * time.Second
+}
+
+// StreamingBootstrapRetries returns how many times a streaming request may be retried before any bytes are sent.
+func StreamingBootstrapRetries(cfg *config.SDKConfig) int {
+	retries := defaultStreamingBootstrapRetries
+	if cfg != nil && cfg.Streaming.BootstrapRetries != nil {
+		retries = *cfg.Streaming.BootstrapRetries
+	}
+	if retries < 0 {
+		retries = 0
+	}
+	return retries
+}
+
+func requestExecutionMetadata(ctx context.Context) map[string]any {
+	// Idempotency-Key is an optional client-supplied header used to correlate retries.
+	// It is forwarded as execution metadata; when absent we generate a UUID.
+	key := ""
+	if ctx != nil {
+		if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil {
+			key = strings.TrimSpace(ginCtx.GetHeader("Idempotency-Key"))
+		}
+	}
+	if key == "" {
+		key = uuid.NewString()
+	}
+	return map[string]any{idempotencyKeyMetadataKey: key}
+}
+
+func mergeMetadata(base, overlay map[string]any) map[string]any {
+	if len(base) == 0 && len(overlay) == 0 {
+		return nil
+	}
+	out := make(map[string]any, len(base)+len(overlay))
+	for k, v := range base {
+		out[k] = v
+	}
+	for k, v := range overlay {
+		out[k] = v
+	}
+	return out
+}
+
 // BaseAPIHandler contains the handlers for API endpoints.
 // It holds a pool of clients to interact with the backend service and manages
 // load balancing, client selection, and configuration.
@@ -104,13 +218,39 @@ func (h *BaseAPIHandler) GetAlt(c *gin.Context) string {
 // Parameters:
 //   - handler: The API handler associated with the request.
 //   - c: The Gin context of the current request.
-//   - ctx: The parent context.
+//   - ctx: The parent context (caller values/deadlines are preserved; request context adds cancellation and request ID).
 //
 // Returns:
 //   - context.Context: The new context with cancellation and embedded values.
 //   - APIHandlerCancelFunc: A function to cancel the context and log the response.
 func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *gin.Context, ctx context.Context) (context.Context, APIHandlerCancelFunc) {
-	newCtx, cancel := context.WithCancel(ctx)
+	parentCtx := ctx
+	if parentCtx == nil {
+		parentCtx = context.Background()
+	}
+
+	var requestCtx context.Context
+	if c != nil && c.Request != nil {
+		requestCtx = c.Request.Context()
+	}
+
+	if requestCtx != nil && logging.GetRequestID(parentCtx) == "" {
+		if requestID := logging.GetRequestID(requestCtx); requestID != "" {
+			parentCtx = logging.WithRequestID(parentCtx, requestID)
+		} else if requestID := logging.GetGinRequestID(c); requestID != "" {
+			parentCtx = logging.WithRequestID(parentCtx, requestID)
+		}
+	}
+	newCtx, cancel := context.WithCancel(parentCtx)
+	if requestCtx != nil && requestCtx != parentCtx {
+		go func() {
+			select {
+			case <-requestCtx.Done():
+				cancel()
+			case <-newCtx.Done():
+			}
+		}()
+	}
 	newCtx = context.WithValue(newCtx, "gin", c)
 	newCtx = context.WithValue(newCtx, "handler", handler)
 	return newCtx, func(params ...interface{}) {
@@ -183,6 +323,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 	if errMsg != nil {
 		return nil, errMsg
 	}
+	reqMeta := requestExecutionMetadata(ctx)
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
@@ -196,9 +337,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
-	if cloned := cloneMetadata(metadata); cloned != nil {
-		opts.Metadata = cloned
-	}
+	opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
 	resp, err := h.AuthManager.Execute(ctx, providers, req, opts)
 	if err != nil {
 		status := http.StatusInternalServerError
@@ -225,6 +364,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 	if errMsg != nil {
 		return nil, errMsg
 	}
+	reqMeta := requestExecutionMetadata(ctx)
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
@@ -238,9 +378,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
-	if cloned := cloneMetadata(metadata); cloned != nil {
-		opts.Metadata = cloned
-	}
+	opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
 	resp, err := h.AuthManager.ExecuteCount(ctx, providers, req, opts)
 	if err != nil {
 		status := http.StatusInternalServerError
@@ -270,6 +408,7 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 		close(errChan)
 		return nil, errChan
 	}
+	reqMeta := requestExecutionMetadata(ctx)
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
 		Payload: cloneBytes(rawJSON),
@@ -283,9 +422,7 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
-	if cloned := cloneMetadata(metadata); cloned != nil {
-		opts.Metadata = cloned
-	}
+	opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta)
 	chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
 	if err != nil {
 		errChan := make(chan *interfaces.ErrorMessage, 1)
@@ -310,31 +447,94 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 	go func() {
 		defer close(dataChan)
 		defer close(errChan)
-		for chunk := range chunks {
-			if chunk.Err != nil {
-				status := http.StatusInternalServerError
-				if se, ok := chunk.Err.(interface{ StatusCode() int }); ok && se != nil {
-					if code := se.StatusCode(); code > 0 {
-						status = code
-					}
-				}
-				var addon http.Header
-				if he, ok := chunk.Err.(interface{ Headers() http.Header }); ok && he != nil {
-					if hdr := he.Headers(); hdr != nil {
-						addon = hdr.Clone()
-					}
-				}
-				errChan <- &interfaces.ErrorMessage{StatusCode: status, Error: chunk.Err, Addon: addon}
-				return
+		sentPayload := false
+		bootstrapRetries := 0
+		maxBootstrapRetries := StreamingBootstrapRetries(h.Cfg)
+
+		bootstrapEligible := func(err error) bool {
+			status := statusFromError(err)
+			if status == 0 {
+				return true
 			}
-			if len(chunk.Payload) > 0 {
-				dataChan <- cloneBytes(chunk.Payload)
+			switch status {
+			case http.StatusUnauthorized, http.StatusForbidden, http.StatusPaymentRequired,
+				http.StatusRequestTimeout, http.StatusTooManyRequests:
+				return true
+			default:
+				return status >= http.StatusInternalServerError
+			}
+		}
+
+	outer:
+		for {
+			for {
+				var chunk coreexecutor.StreamChunk
+				var ok bool
+				if ctx != nil {
+					select {
+					case <-ctx.Done():
+						return
+					case chunk, ok = <-chunks:
+					}
+				} else {
+					chunk, ok = <-chunks
+				}
+				if !ok {
+					return
+				}
+				if chunk.Err != nil {
+					streamErr := chunk.Err
+					// Safe bootstrap recovery: if the upstream fails before any payload bytes are sent,
+					// retry a few times (to allow auth rotation / transient recovery) and then attempt model fallback.
+					if !sentPayload {
+						if bootstrapRetries < maxBootstrapRetries && bootstrapEligible(streamErr) {
+							bootstrapRetries++
+							retryChunks, retryErr := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
+							if retryErr == nil {
+								chunks = retryChunks
+								continue outer
+							}
+							streamErr = retryErr
+						}
+					}
+
+					status := http.StatusInternalServerError
+					if se, ok := streamErr.(interface{ StatusCode() int }); ok && se != nil {
+						if code := se.StatusCode(); code > 0 {
+							status = code
+						}
+					}
+					var addon http.Header
+					if he, ok := streamErr.(interface{ Headers() http.Header }); ok && he != nil {
+						if hdr := he.Headers(); hdr != nil {
+							addon = hdr.Clone()
+						}
+					}
+					errChan <- &interfaces.ErrorMessage{StatusCode: status, Error: streamErr, Addon: addon}
+					return
+				}
+				if len(chunk.Payload) > 0 {
+					sentPayload = true
+					dataChan <- cloneBytes(chunk.Payload)
+				}
 			}
 		}
 	}()
 	return dataChan, errChan
 }

+func statusFromError(err error) int {
+	if err == nil {
+		return 0
+	}
+	if se, ok := err.(interface{ StatusCode() int }); ok && se != nil {
+		if code := se.StatusCode(); code > 0 {
+			return code
+		}
+	}
+	return 0
+}
+
 func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) {
 	// Resolve "auto" model to an actual available model first
 	resolvedModelName := util.ResolveAutoModel(modelName)
@@ -418,38 +618,7 @@ func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.Erro
 		}
 	}

-	// Prefer preserving upstream JSON error bodies when possible.
-	buildJSONBody := func() []byte {
-		trimmed := strings.TrimSpace(errText)
-		if trimmed != "" && json.Valid([]byte(trimmed)) {
-			return []byte(trimmed)
-		}
-		errType := "invalid_request_error"
-		switch status {
-		case http.StatusUnauthorized:
-			errType = "authentication_error"
-		case http.StatusForbidden:
-			errType = "permission_error"
-		case http.StatusTooManyRequests:
-			errType = "rate_limit_error"
-		default:
-			if status >= http.StatusInternalServerError {
-				errType = "server_error"
-			}
-		}
-		payload, err := json.Marshal(ErrorResponse{
-			Error: ErrorDetail{
-				Message: errText,
-				Type:    errType,
-			},
-		})
-		if err != nil {
-			return []byte(fmt.Sprintf(`{"error":{"message":%q,"type":"server_error"}}`, errText))
-		}
-		return payload
-	}
-
-	body := buildJSONBody()
+	body := BuildErrorResponseBody(status, errText)
 	c.Set("API_RESPONSE", bytes.Clone(body))

 	if !c.Writer.Written() {
--- a/sdk/api/handlers/handlers_stream_bootstrap_test.go
+++ b/sdk/api/handlers/handlers_stream_bootstrap_test.go
@@ -0,0 +1,125 @@
+package handlers
+
+import (
+	"context"
+	"net/http"
+	"sync"
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
+)
+
+type failOnceStreamExecutor struct {
+	mu    sync.Mutex
+	calls int
+}
+
+func (e *failOnceStreamExecutor) Identifier() string { return "codex" }
+
+func (e *failOnceStreamExecutor) Execute(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
+	return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "Execute not implemented"}
+}
+
+func (e *failOnceStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (<-chan coreexecutor.StreamChunk, error) {
+	e.mu.Lock()
+	e.calls++
+	call := e.calls
+	e.mu.Unlock()
+
+	ch := make(chan coreexecutor.StreamChunk, 1)
+	if call == 1 {
+		ch <- coreexecutor.StreamChunk{
+			Err: &coreauth.Error{
+				Code:       "unauthorized",
+				Message:    "unauthorized",
+				Retryable:  false,
+				HTTPStatus: http.StatusUnauthorized,
+			},
+		}
+		close(ch)
+		return ch, nil
+	}
+
+	ch <- coreexecutor.StreamChunk{Payload: []byte("ok")}
+	close(ch)
+	return ch, nil
+}
+
+func (e *failOnceStreamExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
+	return auth, nil
+}
+
+func (e *failOnceStreamExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
+	return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "CountTokens not implemented"}
+}
+
+func (e *failOnceStreamExecutor) Calls() int {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	return e.calls
+}
+
+func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
+	executor := &failOnceStreamExecutor{}
+	manager := coreauth.NewManager(nil, nil, nil)
+	manager.RegisterExecutor(executor)
+
+	auth1 := &coreauth.Auth{
+		ID:       "auth1",
+		Provider: "codex",
+		Status:   coreauth.StatusActive,
+		Metadata: map[string]any{"email": "test1@example.com"},
+	}
+	if _, err := manager.Register(context.Background(), auth1); err != nil {
+		t.Fatalf("manager.Register(auth1): %v", err)
+	}
+
+	auth2 := &coreauth.Auth{
+		ID:       "auth2",
+		Provider: "codex",
+		Status:   coreauth.StatusActive,
+		Metadata: map[string]any{"email": "test2@example.com"},
+	}
+	if _, err := manager.Register(context.Background(), auth2); err != nil {
+		t.Fatalf("manager.Register(auth2): %v", err)
+	}
+
+	registry.GetGlobalRegistry().RegisterClient(auth1.ID, auth1.Provider, []*registry.ModelInfo{{ID: "test-model"}})
+	registry.GetGlobalRegistry().RegisterClient(auth2.ID, auth2.Provider, []*registry.ModelInfo{{ID: "test-model"}})
+	t.Cleanup(func() {
+		registry.GetGlobalRegistry().UnregisterClient(auth1.ID)
+		registry.GetGlobalRegistry().UnregisterClient(auth2.ID)
+	})
+
+	bootstrapRetries := 1
+	handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
+		Streaming: sdkconfig.StreamingConfig{
+			BootstrapRetries: &bootstrapRetries,
+		},
+	}, manager, nil)
+	dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
+	if dataChan == nil || errChan == nil {
+		t.Fatalf("expected non-nil channels")
+	}
+
+	var got []byte
+	for chunk := range dataChan {
+		got = append(got, chunk...)
+	}
+
+	for msg := range errChan {
+		if msg != nil {
+			t.Fatalf("unexpected error: %+v", msg)
+		}
+	}
+
+	if string(got) != "ok" {
+		t.Fatalf("expected payload ok, got %q", string(got))
+	}
+	if executor.Calls() != 2 {
+		t.Fatalf("expected 2 stream attempts, got %d", executor.Calls())
+	}
+}
--- a/sdk/api/handlers/openai/openai_handlers.go
+++ b/sdk/api/handlers/openai/openai_handlers.go
@@ -11,7 +11,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"net/http"
-	"time"
+	"sync"

 	"github.com/gin-gonic/gin"
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
@@ -443,11 +443,6 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []
 //   - c: The Gin context containing the HTTP request and response
 //   - rawJSON: The raw JSON bytes of the OpenAI-compatible request
 func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
 	// Get the http.Flusher interface to manually flush the response.
 	flusher, ok := c.Writer.(http.Flusher)
 	if !ok {
@@ -463,7 +458,55 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt
 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
 	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, h.GetAlt(c))
-	h.handleStreamResult(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
+
+	setSSEHeaders := func() {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Peek at the first chunk to determine success or failure before setting headers
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cliCancel(c.Request.Context().Err())
+			return
+		case errMsg, ok := <-errChan:
+			if !ok {
+				// Err channel closed cleanly; wait for data channel.
+				errChan = nil
+				continue
+			}
+			// Upstream failed immediately. Return proper error status and JSON.
+			h.WriteErrorResponse(c, errMsg)
+			if errMsg != nil {
+				cliCancel(errMsg.Error)
+			} else {
+				cliCancel(nil)
+			}
+			return
+		case chunk, ok := <-dataChan:
+			if !ok {
+				// Stream closed without data? Send DONE or just headers.
+				setSSEHeaders()
+				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
+				flusher.Flush()
+				cliCancel(nil)
+				return
+			}
+
+			// Success! Commit to streaming headers.
+			setSSEHeaders()
+
+			_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(chunk))
+			flusher.Flush()
+
+			// Continue streaming the rest
+			h.handleStreamResult(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
+			return
+		}
+	}
 }

 // handleCompletionsNonStreamingResponse handles non-streaming completions responses.
@@ -500,11 +543,6 @@ func (h *OpenAIAPIHandler) handleCompletionsNonStreamingResponse(c *gin.Context,
 //   - c: The Gin context containing the HTTP request and response
 //   - rawJSON: The raw JSON bytes of the OpenAI-compatible completions request
 func (h *OpenAIAPIHandler) handleCompletionsStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
 	// Get the http.Flusher interface to manually flush the response.
 	flusher, ok := c.Writer.(http.Flusher)
 	if !ok {
@@ -524,71 +562,109 @@ func (h *OpenAIAPIHandler) handleCompletionsStreamingResponse(c *gin.Context, ra
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
 	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")

+	setSSEHeaders := func() {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Peek at the first chunk
 	for {
 		select {
 		case <-c.Request.Context().Done():
 			cliCancel(c.Request.Context().Err())
 			return
-		case chunk, isOk := <-dataChan:
-			if !isOk {
+		case errMsg, ok := <-errChan:
+			if !ok {
+				// Err channel closed cleanly; wait for data channel.
+				errChan = nil
+				continue
+			}
+			h.WriteErrorResponse(c, errMsg)
+			if errMsg != nil {
+				cliCancel(errMsg.Error)
+			} else {
+				cliCancel(nil)
+			}
+			return
+		case chunk, ok := <-dataChan:
+			if !ok {
+				setSSEHeaders()
 				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
 				flusher.Flush()
-				cliCancel()
+				cliCancel(nil)
 				return
 			}
+
+			// Success! Set headers.
+			setSSEHeaders()
+
+			// Write the first chunk
 			converted := convertChatCompletionsStreamChunkToCompletions(chunk)
 			if converted != nil {
 				_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(converted))
 				flusher.Flush()
 			}
-		case errMsg, isOk := <-errChan:
-			if !isOk {
-				continue
-			}
-			if errMsg != nil {
-				h.WriteErrorResponse(c, errMsg)
-				flusher.Flush()
-			}
-			var execErr error
-			if errMsg != nil {
-				execErr = errMsg.Error
-			}
-			cliCancel(execErr)
+
+			done := make(chan struct{})
+			var doneOnce sync.Once
+			stop := func() { doneOnce.Do(func() { close(done) }) }
+
+			convertedChan := make(chan []byte)
+			go func() {
+				defer close(convertedChan)
+				for {
+					select {
+					case <-done:
+						return
+					case chunk, ok := <-dataChan:
+						if !ok {
+							return
+						}
+						converted := convertChatCompletionsStreamChunkToCompletions(chunk)
+						if converted == nil {
+							continue
+						}
+						select {
+						case <-done:
+							return
+						case convertedChan <- converted:
+						}
+					}
+				}
+			}()
+
+			h.handleStreamResult(c, flusher, func(err error) {
+				stop()
+				cliCancel(err)
+			}, convertedChan, errChan)
 			return
-		case <-time.After(500 * time.Millisecond):
 		}
 	}
 }
 func (h *OpenAIAPIHandler) handleStreamResult(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
-	for {
-		select {
-		case <-c.Request.Context().Done():
-			cancel(c.Request.Context().Err())
-			return
-		case chunk, ok := <-data:
-			if !ok {
-				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
-				flusher.Flush()
-				cancel(nil)
+	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
+		WriteChunk: func(chunk []byte) {
+			_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(chunk))
+		},
+		WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
+			if errMsg == nil {
 				return
 			}
-			_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(chunk))
-			flusher.Flush()
-		case errMsg, ok := <-errs:
-			if !ok {
-				continue
+			status := http.StatusInternalServerError
+			if errMsg.StatusCode > 0 {
+				status = errMsg.StatusCode
 			}
-			if errMsg != nil {
-				h.WriteErrorResponse(c, errMsg)
-				flusher.Flush()
+			errText := http.StatusText(status)
+			if errMsg.Error != nil && errMsg.Error.Error() != "" {
+				errText = errMsg.Error.Error()
 			}
-			var execErr error
-			if errMsg != nil {
-				execErr = errMsg.Error
-			}
-			cancel(execErr)
-			return
-		case <-time.After(500 * time.Millisecond):
-		}
-	}
+			body := handlers.BuildErrorResponseBody(status, errText)
+			_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(body))
+		},
+		WriteDone: func() {
+			_, _ = fmt.Fprint(c.Writer, "data: [DONE]\n\n")
+		},
+	})
 }
--- a/sdk/api/handlers/openai/openai_responses_handlers.go
+++ b/sdk/api/handlers/openai/openai_responses_handlers.go
@@ -11,7 +11,6 @@ import (
 	"context"
 	"fmt"
 	"net/http"
-	"time"

 	"github.com/gin-gonic/gin"
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
@@ -128,11 +127,6 @@ func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponse(c *gin.Context, r
 //   - c: The Gin context containing the HTTP request and response
 //   - rawJSON: The raw JSON bytes of the OpenAIResponses-compatible request
 func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
-	c.Header("Content-Type", "text/event-stream")
-	c.Header("Cache-Control", "no-cache")
-	c.Header("Connection", "keep-alive")
-	c.Header("Access-Control-Allow-Origin", "*")
-
 	// Get the http.Flusher interface to manually flush the response.
 	flusher, ok := c.Writer.(http.Flusher)
 	if !ok {
@@ -149,46 +143,88 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ
 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
 	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
-	h.forwardResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
-	return
-}

-func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	setSSEHeaders := func() {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Peek at the first chunk
 	for {
 		select {
 		case <-c.Request.Context().Done():
-			cancel(c.Request.Context().Err())
+			cliCancel(c.Request.Context().Err())
 			return
-		case chunk, ok := <-data:
+		case errMsg, ok := <-errChan:
 			if !ok {
+				// Err channel closed cleanly; wait for data channel.
+				errChan = nil
+				continue
+			}
+			// Upstream failed immediately. Return proper error status and JSON.
+			h.WriteErrorResponse(c, errMsg)
+			if errMsg != nil {
+				cliCancel(errMsg.Error)
+			} else {
+				cliCancel(nil)
+			}
+			return
+		case chunk, ok := <-dataChan:
+			if !ok {
+				// Stream closed without data? Send headers and done.
+				setSSEHeaders()
 				_, _ = c.Writer.Write([]byte("\n"))
 				flusher.Flush()
-				cancel(nil)
+				cliCancel(nil)
 				return
 			}

+			// Success! Set headers.
+			setSSEHeaders()
+
+			// Write first chunk logic (matching forwardResponsesStream)
 			if bytes.HasPrefix(chunk, []byte("event:")) {
 				_, _ = c.Writer.Write([]byte("\n"))
 			}
 			_, _ = c.Writer.Write(chunk)
 			_, _ = c.Writer.Write([]byte("\n"))
-
 			flusher.Flush()
-		case errMsg, ok := <-errs:
-			if !ok {
-				continue
-			}
-			if errMsg != nil {
-				h.WriteErrorResponse(c, errMsg)
-				flusher.Flush()
-			}
-			var execErr error
-			if errMsg != nil {
-				execErr = errMsg.Error
-			}
-			cancel(execErr)
+
+			// Continue
+			h.forwardResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
 			return
-		case <-time.After(500 * time.Millisecond):
 		}
 	}
 }
+
+func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
+	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
+		WriteChunk: func(chunk []byte) {
+			if bytes.HasPrefix(chunk, []byte("event:")) {
+				_, _ = c.Writer.Write([]byte("\n"))
+			}
+			_, _ = c.Writer.Write(chunk)
+			_, _ = c.Writer.Write([]byte("\n"))
+		},
+		WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
+			if errMsg == nil {
+				return
+			}
+			status := http.StatusInternalServerError
+			if errMsg.StatusCode > 0 {
+				status = errMsg.StatusCode
+			}
+			errText := http.StatusText(status)
+			if errMsg.Error != nil && errMsg.Error.Error() != "" {
+				errText = errMsg.Error.Error()
+			}
+			body := handlers.BuildErrorResponseBody(status, errText)
+			_, _ = fmt.Fprintf(c.Writer, "\nevent: error\ndata: %s\n\n", string(body))
+		},
+		WriteDone: func() {
+			_, _ = c.Writer.Write([]byte("\n"))
+		},
+	})
+}
--- a/sdk/api/handlers/stream_forwarder.go
+++ b/sdk/api/handlers/stream_forwarder.go
@@ -0,0 +1,121 @@
+package handlers
+
+import (
+	"net/http"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
+)
+
+type StreamForwardOptions struct {
+	// KeepAliveInterval overrides the configured streaming keep-alive interval.
+	// If nil, the configured default is used. If set to <= 0, keep-alives are disabled.
+	KeepAliveInterval *time.Duration
+
+	// WriteChunk writes a single data chunk to the response body. It should not flush.
+	WriteChunk func(chunk []byte)
+
+	// WriteTerminalError writes an error payload to the response body when streaming fails
+	// after headers have already been committed. It should not flush.
+	WriteTerminalError func(errMsg *interfaces.ErrorMessage)
+
+	// WriteDone optionally writes a terminal marker when the upstream data channel closes
+	// without an error (e.g. OpenAI's `[DONE]`). It should not flush.
+	WriteDone func()
+
+	// WriteKeepAlive optionally writes a keep-alive heartbeat. It should not flush.
+	// When nil, a standard SSE comment heartbeat is used.
+	WriteKeepAlive func()
+}
+
+func (h *BaseAPIHandler) ForwardStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, opts StreamForwardOptions) {
+	if c == nil {
+		return
+	}
+	if cancel == nil {
+		return
+	}
+
+	writeChunk := opts.WriteChunk
+	if writeChunk == nil {
+		writeChunk = func([]byte) {}
+	}
+
+	writeKeepAlive := opts.WriteKeepAlive
+	if writeKeepAlive == nil {
+		writeKeepAlive = func() {
+			_, _ = c.Writer.Write([]byte(": keep-alive\n\n"))
+		}
+	}
+
+	keepAliveInterval := StreamingKeepAliveInterval(h.Cfg)
+	if opts.KeepAliveInterval != nil {
+		keepAliveInterval = *opts.KeepAliveInterval
+	}
+	var keepAlive *time.Ticker
+	var keepAliveC <-chan time.Time
+	if keepAliveInterval > 0 {
+		keepAlive = time.NewTicker(keepAliveInterval)
+		defer keepAlive.Stop()
+		keepAliveC = keepAlive.C
+	}
+
+	var terminalErr *interfaces.ErrorMessage
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cancel(c.Request.Context().Err())
+			return
+		case chunk, ok := <-data:
+			if !ok {
+				// Prefer surfacing a terminal error if one is pending.
+				if terminalErr == nil {
+					select {
+					case errMsg, ok := <-errs:
+						if ok && errMsg != nil {
+							terminalErr = errMsg
+						}
+					default:
+					}
+				}
+				if terminalErr != nil {
+					if opts.WriteTerminalError != nil {
+						opts.WriteTerminalError(terminalErr)
+					}
+					flusher.Flush()
+					cancel(terminalErr.Error)
+					return
+				}
+				if opts.WriteDone != nil {
+					opts.WriteDone()
+				}
+				flusher.Flush()
+				cancel(nil)
+				return
+			}
+			writeChunk(chunk)
+			flusher.Flush()
+		case errMsg, ok := <-errs:
+			if !ok {
+				continue
+			}
+			if errMsg != nil {
+				terminalErr = errMsg
+				if opts.WriteTerminalError != nil {
+					opts.WriteTerminalError(errMsg)
+					flusher.Flush()
+				}
+			}
+			var execErr error
+			if errMsg != nil {
+				execErr = errMsg.Error
+			}
+			cancel(execErr)
+			return
+		case <-keepAliveC:
+			writeKeepAlive()
+			flusher.Flush()
+		}
+	}
+}
--- a/sdk/auth/kiro.go
+++ b/sdk/auth/kiro.go
@@ -53,20 +53,8 @@ func (a *KiroAuthenticator) RefreshLead() *time.Duration {
 	return &d
 }

-// Login performs OAuth login for Kiro with AWS Builder ID.
-func (a *KiroAuthenticator) Login(ctx context.Context, cfg *config.Config, opts *LoginOptions) (*coreauth.Auth, error) {
-	if cfg == nil {
-		return nil, fmt.Errorf("kiro auth: configuration is required")
-	}
-
-	oauth := kiroauth.NewKiroOAuth(cfg)
-
-	// Use AWS Builder ID device code flow
-	tokenData, err := oauth.LoginWithBuilderID(ctx)
-	if err != nil {
-		return nil, fmt.Errorf("login failed: %w", err)
-	}
-
+// createAuthRecord creates an auth record from token data.
+func (a *KiroAuthenticator) createAuthRecord(tokenData *kiroauth.KiroTokenData, source string) (*coreauth.Auth, error) {
 	// Parse expires_at
 	expiresAt, err := time.Parse(time.RFC3339, tokenData.ExpiresAt)
 	if err != nil {
@@ -76,34 +64,63 @@ func (a *KiroAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 	// Extract identifier for file naming
 	idPart := extractKiroIdentifier(tokenData.Email, tokenData.ProfileArn)

+	// Determine label based on auth method
+	label := fmt.Sprintf("kiro-%s", source)
+	if tokenData.AuthMethod == "idc" {
+		label = "kiro-idc"
+	}
+
 	now := time.Now()
-	fileName := fmt.Sprintf("kiro-aws-%s.json", idPart)
+	fileName := fmt.Sprintf("%s-%s.json", label, idPart)
+
+	metadata := map[string]any{
+		"type":          "kiro",
+		"access_token":  tokenData.AccessToken,
+		"refresh_token": tokenData.RefreshToken,
+		"profile_arn":   tokenData.ProfileArn,
+		"expires_at":    tokenData.ExpiresAt,
+		"auth_method":   tokenData.AuthMethod,
+		"provider":      tokenData.Provider,
+		"client_id":     tokenData.ClientID,
+		"client_secret": tokenData.ClientSecret,
+		"email":         tokenData.Email,
+	}
+
+	// Add IDC-specific fields if present
+	if tokenData.StartURL != "" {
+		metadata["start_url"] = tokenData.StartURL
+	}
+	if tokenData.Region != "" {
+		metadata["region"] = tokenData.Region
+	}
+
+	attributes := map[string]string{
+		"profile_arn": tokenData.ProfileArn,
+		"source":      source,
+		"email":       tokenData.Email,
+	}
+
+	// Add IDC-specific attributes if present
+	if tokenData.AuthMethod == "idc" {
+		attributes["source"] = "aws-idc"
+		if tokenData.StartURL != "" {
+			attributes["start_url"] = tokenData.StartURL
+		}
+		if tokenData.Region != "" {
+			attributes["region"] = tokenData.Region
+		}
+	}

 	record := &coreauth.Auth{
 		ID:        fileName,
 		Provider:  "kiro",
 		FileName:  fileName,
-		Label:     "kiro-aws",
+		Label:     label,
 		Status:    coreauth.StatusActive,
 		CreatedAt: now,
 		UpdatedAt: now,
-		Metadata: map[string]any{
-			"type":          "kiro",
-			"access_token":  tokenData.AccessToken,
-			"refresh_token": tokenData.RefreshToken,
-			"profile_arn":   tokenData.ProfileArn,
-			"expires_at":    tokenData.ExpiresAt,
-			"auth_method":   tokenData.AuthMethod,
-			"provider":      tokenData.Provider,
-			"client_id":     tokenData.ClientID,
-			"client_secret": tokenData.ClientSecret,
-			"email":         tokenData.Email,
-		},
-		Attributes: map[string]string{
-			"profile_arn": tokenData.ProfileArn,
-			"source":      "aws-builder-id",
-			"email":       tokenData.Email,
-		},
+		Metadata:  metadata,
+		Attributes: attributes,
 		// NextRefreshAfter is aligned with RefreshLead (5min)
 		NextRefreshAfter: expiresAt.Add(-5 * time.Minute),
 	}
@@ -117,6 +134,23 @@ func (a *KiroAuthenticator) Login(ctx context.Context, cfg *config.Config, opts
 	return record, nil
 }

+// Login performs OAuth login for Kiro with AWS (Builder ID or IDC).
+// This shows a method selection prompt and handles both flows.
+func (a *KiroAuthenticator) Login(ctx context.Context, cfg *config.Config, opts *LoginOptions) (*coreauth.Auth, error) {
+	if cfg == nil {
+		return nil, fmt.Errorf("kiro auth: configuration is required")
+	}
+
+	// Use the unified method selection flow (Builder ID or IDC)
+	ssoClient := kiroauth.NewSSOOIDCClient(cfg)
+	tokenData, err := ssoClient.LoginWithMethodSelection(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("login failed: %w", err)
+	}
+
+	return a.createAuthRecord(tokenData, "aws")
+}
+
 // LoginWithAuthCode performs OAuth login for Kiro with AWS Builder ID using authorization code flow.
 // This provides a better UX than device code flow as it uses automatic browser callback.
 func (a *KiroAuthenticator) LoginWithAuthCode(ctx context.Context, cfg *config.Config, opts *LoginOptions) (*coreauth.Auth, error) {
@@ -388,15 +422,23 @@ func (a *KiroAuthenticator) Refresh(ctx context.Context, cfg *config.Config, aut
 	clientID, _ := auth.Metadata["client_id"].(string)
 	clientSecret, _ := auth.Metadata["client_secret"].(string)
 	authMethod, _ := auth.Metadata["auth_method"].(string)
+	startURL, _ := auth.Metadata["start_url"].(string)
+	region, _ := auth.Metadata["region"].(string)

 	var tokenData *kiroauth.KiroTokenData
 	var err error

-	// Use SSO OIDC refresh for AWS Builder ID, otherwise use Kiro's OAuth refresh endpoint
-	if clientID != "" && clientSecret != "" && authMethod == "builder-id" {
-		ssoClient := kiroauth.NewSSOOIDCClient(cfg)
+	ssoClient := kiroauth.NewSSOOIDCClient(cfg)
+
+	// Use SSO OIDC refresh for AWS Builder ID or IDC, otherwise use Kiro's OAuth refresh endpoint
+	switch {
+	case clientID != "" && clientSecret != "" && authMethod == "idc" && region != "":
+		// IDC refresh with region-specific endpoint
+		tokenData, err = ssoClient.RefreshTokenWithRegion(ctx, clientID, clientSecret, refreshToken, region, startURL)
+	case clientID != "" && clientSecret != "" && authMethod == "builder-id":
+		// Builder ID refresh with default endpoint
 		tokenData, err = ssoClient.RefreshToken(ctx, clientID, clientSecret, refreshToken)
-	} else {
+	default:
 		// Fallback to Kiro's refresh endpoint (for social auth: Google/GitHub)
 		oauth := kiroauth.NewKiroOAuth(cfg)
 		tokenData, err = oauth.RefreshToken(ctx, refreshToken)
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -12,6 +12,7 @@ import (
 	"time"

 	"github.com/google/uuid"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -389,17 +390,18 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req

 		accountType, accountInfo := auth.AccountInfo()
 		proxyInfo := auth.ProxyInfo()
+		entry := logEntryWithRequestID(ctx)
 		if accountType == "api_key" {
 			if proxyInfo != "" {
-				log.Debugf("Use API key %s for model %s %s", util.HideAPIKey(accountInfo), req.Model, proxyInfo)
+				entry.Debugf("Use API key %s for model %s %s", util.HideAPIKey(accountInfo), req.Model, proxyInfo)
 			} else {
-				log.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
+				entry.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
 			}
 		} else if accountType == "oauth" {
 			if proxyInfo != "" {
-				log.Debugf("Use OAuth %s for model %s %s", accountInfo, req.Model, proxyInfo)
+				entry.Debugf("Use OAuth %s for model %s %s", accountInfo, req.Model, proxyInfo)
 			} else {
-				log.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
+				entry.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
 			}
 		}

@@ -449,17 +451,18 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string,

 		accountType, accountInfo := auth.AccountInfo()
 		proxyInfo := auth.ProxyInfo()
+		entry := logEntryWithRequestID(ctx)
 		if accountType == "api_key" {
 			if proxyInfo != "" {
-				log.Debugf("Use API key %s for model %s %s", util.HideAPIKey(accountInfo), req.Model, proxyInfo)
+				entry.Debugf("Use API key %s for model %s %s", util.HideAPIKey(accountInfo), req.Model, proxyInfo)
 			} else {
-				log.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
+				entry.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
 			}
 		} else if accountType == "oauth" {
 			if proxyInfo != "" {
-				log.Debugf("Use OAuth %s for model %s %s", accountInfo, req.Model, proxyInfo)
+				entry.Debugf("Use OAuth %s for model %s %s", accountInfo, req.Model, proxyInfo)
 			} else {
-				log.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
+				entry.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
 			}
 		}

@@ -509,17 +512,18 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string

 		accountType, accountInfo := auth.AccountInfo()
 		proxyInfo := auth.ProxyInfo()
+		entry := logEntryWithRequestID(ctx)
 		if accountType == "api_key" {
 			if proxyInfo != "" {
-				log.Debugf("Use API key %s for model %s %s", util.HideAPIKey(accountInfo), req.Model, proxyInfo)
+				entry.Debugf("Use API key %s for model %s %s", util.HideAPIKey(accountInfo), req.Model, proxyInfo)
 			} else {
-				log.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
+				entry.Debugf("Use API key %s for model %s", util.HideAPIKey(accountInfo), req.Model)
 			}
 		} else if accountType == "oauth" {
 			if proxyInfo != "" {
-				log.Debugf("Use OAuth %s for model %s %s", accountInfo, req.Model, proxyInfo)
+				entry.Debugf("Use OAuth %s for model %s %s", accountInfo, req.Model, proxyInfo)
 			} else {
-				log.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
+				entry.Debugf("Use OAuth %s for model %s", accountInfo, req.Model)
 			}
 		}

@@ -1606,6 +1610,17 @@ type RequestPreparer interface {
 	PrepareRequest(req *http.Request, auth *Auth) error
 }

+// logEntryWithRequestID returns a logrus entry with request_id field if available in context.
+func logEntryWithRequestID(ctx context.Context) *log.Entry {
+	if ctx == nil {
+		return log.NewEntry(log.StandardLogger())
+	}
+	if reqID := logging.GetRequestID(ctx); reqID != "" {
+		return log.WithField("request_id", reqID)
+	}
+	return log.NewEntry(log.StandardLogger())
+}
+
 // InjectCredentials delegates per-provider HTTP request preparation when supported.
 // If the registered executor for the auth provider implements RequestPreparer,
 // it will be invoked to modify the request (e.g., add headers).
--- a/sdk/config/config.go
+++ b/sdk/config/config.go
@@ -12,6 +12,7 @@ type AccessProvider = internalconfig.AccessProvider

 type Config = internalconfig.Config

+type StreamingConfig = internalconfig.StreamingConfig
 type TLSConfig = internalconfig.TLSConfig
 type RemoteManagement = internalconfig.RemoteManagement
 type AmpCode = internalconfig.AmpCode
Author	SHA1	Message	Date
Luis Pater	7af5a90a0b	Merge pull request #65 from router-for-me/plus v6.6.52	2025-12-24 22:28:45 +08:00
Luis Pater	7551faff79	Merge branch 'main' into plus	2025-12-24 22:27:12 +08:00
Luis Pater	b7409dd2de	Merge pull request #706 from router-for-me/log Log	2025-12-24 22:24:39 +08:00
hkfires	5ba325a8fc	refactor(logging): standardize request id formatting and layout	2025-12-24 22:03:07 +08:00
Luis Pater	d502840f91	Merge pull request #695 from NguyenSiTrung/main feat: add cached token parsing for Gemini , Antigravity API responses	2025-12-24 21:58:55 +08:00
hkfires	99238a4b59	fix(logging): normalize warning level to warn	2025-12-24 21:11:37 +08:00
hkfires	6d43a2ff9a	refactor(logging): inline request id in log output	2025-12-24 21:07:18 +08:00
Luis Pater	cdb9c2e6e8	Merge remote-tracking branch 'origin/main' into router-for-me/main	2025-12-24 20:18:53 +08:00
Luis Pater	3faa1ca9af	Merge pull request #700 from router-for-me/log refactor(sdk/auth): rename manager.go to conductor.go	2025-12-24 19:36:24 +08:00
Luis Pater	9d975e0375	feat(models): add support for GLM-4.7 and MiniMax-M2.1	2025-12-24 19:30:57 +08:00
hkfires	2a6d8b78d4	feat(api): add endpoint to retrieve request logs by ID	2025-12-24 19:24:51 +08:00
Luis Pater	6b80ec79a0	Merge pull request #61 from tinyc0der/fix/kiro-tool-results fix(kiro): Handle tool results correctly in OpenAI format translation	2025-12-24 17:23:41 +08:00
Luis Pater	d3f4783a24	Merge pull request #57 from PancakeZik/my-idc-changes feat: add AWS Identity Center (IDC) authentication support	2025-12-24 17:20:01 +08:00
Luis Pater	1cb6bdbc87	Merge pull request #62 from router-for-me/plus v6.6.50	2025-12-24 17:14:05 +08:00
Luis Pater	96ddfc1f24	Merge branch 'main' into plus	2025-12-24 17:13:51 +08:00
TinyCoder	c169b32570	refactor(kiro): Remove unused variables in OpenAI translator Remove dead code that was never used: - toolCallIDToName map: built but never read from - seenToolCallIDs: declared but never populated, only suppressed with _	2025-12-24 15:10:35 +07:00
TinyCoder	36a512fdf2	fix(kiro): Handle tool results correctly in OpenAI format translation Fix three issues in Kiro OpenAI translator that caused "Improperly formed request" errors when processing LiteLLM-translated requests with tool_use/tool_result: 1. Skip merging tool role messages in MergeAdjacentMessages() to preserve individual tool_call_id fields 2. Track pendingToolResults and attach to the next user message instead of only the last message. Create synthetic user message when conversation ends with tool results. 3. Insert synthetic user message with tool results before assistant messages to maintain proper alternating user/assistant structure. This fixes the case where LiteLLM translates Anthropic user messages containing only tool_result blocks into tool role messages followed by assistant. Adds unit tests covering all tool result handling scenarios.	2025-12-24 15:10:35 +07:00
hkfires	26fbb77901	refactor(sdk/auth): rename manager.go to conductor.go	2025-12-24 15:21:03 +08:00
NguyenSiTrung	a277302262	Merge remote-tracking branch 'upstream/main'	2025-12-24 10:54:09 +07:00
NguyenSiTrung	969c1a5b72	refactor: extract parseGeminiFamilyUsageDetail helper to reduce duplication	2025-12-24 10:22:31 +07:00
NguyenSiTrung	872339bceb	feat: add cached token parsing for Gemini API responses	2025-12-24 10:20:11 +07:00
Luis Pater	5dc0dbc7aa	Merge pull request #697 from Cubence-com/main docs(readme): add Cubence sponsor and fix PackyCode link	2025-12-24 11:19:32 +08:00
Luis Pater	ee6fc4e8a1	Merge pull request #60 from router-for-me/pr-59-resolve-conflicts v6.6.50（解决 #59 冲突）	2025-12-24 11:12:09 +08:00
Luis Pater	8fee16aecd	Merge PR #59 : v6.6.50 (resolve conflicts)	2025-12-24 11:06:10 +08:00
Luis Pater	2b7ba54a2f	Merge pull request #688 from router-for-me/feature/request-id-tracking feat(logging): implement request ID tracking and propagation	2025-12-24 10:54:13 +08:00
hkfires	007c3304f2	feat(logging): scope request ID tracking to AI API endpoints	2025-12-24 09:17:09 +08:00
hkfires	e76ba0ede9	feat(logging): implement request ID tracking and propagation	2025-12-24 08:32:17 +08:00
Luis Pater	c06ac07e23	Merge pull request #686 from ajkdrag/main feat: regex support for model-mappings	2025-12-24 04:37:44 +08:00
Luis Pater	e592a57458	Merge branch 'router-for-me:main' into main	2025-12-24 04:25:06 +08:00
Luis Pater	66769ec657	fix(translators): update role from `tool` to `user` in Gemini and Gemini-CLI requests	2025-12-24 04:24:07 +08:00
Luis Pater	f413feec61	refactor(handlers): streamline error and data channel handling in streaming logic Improved consistency across OpenAI, Claude, and Gemini handlers by replacing initial `select` statement with a `for` loop for better readability and error-handling robustness.	2025-12-24 04:07:24 +08:00
Luis Pater	2e538e3486	Merge pull request #661 from jroth1111/fix/streaming-bootstrap-forwarder fix: improve streaming bootstrap and forwarding	2025-12-24 03:51:40 +08:00
Luis Pater	9617a7b0d6	Merge pull request #621 from dacsang97/fix/antigravity-prompt-caching Antigravity Prompt Caching Fix	2025-12-24 03:50:25 +08:00
Luis Pater	7569320770	Merge branch 'dev' into fix/antigravity-prompt-caching	2025-12-24 03:49:46 +08:00
Fetters	8d25cf0d75	fix(readme): update PackyCode sponsorship link and remove redundant tbody	2025-12-23 23:44:40 +08:00
Fetters	64e85e7019	docs(readme): add Cubence sponsor	2025-12-23 23:30:57 +08:00
altamash	0c0aae1eac	Robust change detection: replaced string concat with struct-based compare in hasModelMappingsChanged; removed boolTo01. • Performance: pre-allocate map and regex slice capacities in UpdateMappings. • Verified with amp module tests (all passing)	2025-12-23 18:52:28 +05:30
altamash	5dcf7cb846	feat: regex support for model-mappings	2025-12-23 18:41:58 +05:30
Joao	349b2ba3af	refactor: improve error handling and code quality - Handle errors in promptInput instead of ignoring them - Improve promptSelect to provide feedback on invalid input and re-prompt - Use sentinel errors (ErrAuthorizationPending, ErrSlowDown) instead of string-based error checking with strings.Contains - Move hardcoded x-amz-user-agent header to idcAmzUserAgent constant Addresses code review feedback from Gemini Code Assist.	2025-12-23 10:20:14 +00:00
Joao	98db5aabd0	feat: persist refreshed IDC tokens to auth file Add persistRefreshedAuth function to write refreshed tokens back to the auth file after inline token refresh. This prevents repeated token refreshes on every request when the token expires. Changes: - Add persistRefreshedAuth() to kiro_executor.go - Call persist after all token refresh paths (401, 403, pre-request) - Remove unused log import from sdk/auth/kiro.go	2025-12-23 10:00:14 +00:00
Joao	7fd98f3556	feat: add IDC auth support with Kiro IDE headers	2025-12-23 08:18:10 +00:00
gwizz	5bf89dd757	fix: keep streaming defaults legacy-safe	2025-12-23 00:53:18 +11:00
gwizz	4442574e53	fix: stop streaming loop on context cancel	2025-12-23 00:37:55 +11:00
gwizz	71a6dffbb6	fix: improve streaming bootstrap and forwarding	2025-12-22 23:34:23 +11:00
Evan Nguyen	24e8e20b59	Merge branch 'main' into fix/antigravity-prompt-caching	2025-12-21 19:43:24 +07:00
Evan Nguyen	a87f09bad2	feat(antigravity): add session ID generation and mutex for random source	2025-12-21 17:50:41 +07:00
evann	bc6c4cdbfc	feat(antigravity): add logging for cached token setting errors in responses	2025-12-19 16:49:50 +07:00
evann	404546ce93	refactor(antigravity): regarding production endpoint caching	2025-12-19 16:36:54 +07:00
evann	6dd1cf1dd6	Merge branch 'main' into fix/antigravity-prompt-caching	2025-12-19 16:34:28 +07:00
evann	9058d406a3	feat(antigravity): enhance prompt caching support and update agent version	2025-12-19 16:33:41 +07:00