Merge branch 'router-for-me:main' into main

fix(registry): update DeepSeek model definitions with new IDs and descriptions
Merge pull request #449 from sususu98/fix/gemini-cli-429-retry-delay-parsing
2026-04-23 22:42:39 +00:00 · 2025-12-14 16:19:29 +08:00 · 2025-12-14 16:17:11 +08:00 · 2025-12-14 14:04:14 +08:00 · 2025-12-14 13:26:05 +08:00 · 2025-12-14 12:51:28 +08:00
21 changed files with 5199 additions and 3309 deletions
--- a/internal/api/modules/amp/response_rewriter.go
+++ b/internal/api/modules/amp/response_rewriter.go
@@ -29,17 +29,79 @@ func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRe
 	}
 }
 const maxBufferedResponseBytes = 2 * 1024 * 1024 // 2MB safety cap
 func looksLikeSSEChunk(data []byte) bool {
 	// Fallback detection: some upstreams may omit/lie about Content-Type, causing SSE to be buffered.
 	// Heuristics are intentionally simple and cheap.
 	return bytes.Contains(data, []byte("data:")) ||
 		bytes.Contains(data, []byte("event:")) ||
 		bytes.Contains(data, []byte("message_start")) ||
 		bytes.Contains(data, []byte("message_delta")) ||
 		bytes.Contains(data, []byte("content_block_start")) ||
 		bytes.Contains(data, []byte("content_block_delta")) ||
 		bytes.Contains(data, []byte("content_block_stop")) ||
 		bytes.Contains(data, []byte("\n\n"))
 }
 func (rw *ResponseRewriter) enableStreaming(reason string) error {
 	if rw.isStreaming {
 		return nil
 	}
 	rw.isStreaming = true
 	// Flush any previously buffered data to avoid reordering or data loss.
 	if rw.body != nil && rw.body.Len() > 0 {
 		buf := rw.body.Bytes()
 		// Copy before Reset() to keep bytes stable.
 		toFlush := make([]byte, len(buf))
 		copy(toFlush, buf)
 		rw.body.Reset()
 		if _, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(toFlush)); err != nil {
 			return err
 		}
 		if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
 			flusher.Flush()
 		}
 	}
 	log.Debugf("amp response rewriter: switched to streaming (%s)", reason)
 	return nil
 }
 // Write intercepts response writes and buffers them for model name replacement
 func (rw *ResponseRewriter) Write(data []byte) (int, error) {
-	// Detect streaming on first write
+	// Detect streaming on first write (header-based)
-	if rw.body.Len() == 0 && !rw.isStreaming {
+	if !rw.isStreaming && rw.body.Len() == 0 {
 		contentType := rw.Header().Get("Content-Type")
 		rw.isStreaming = strings.Contains(contentType, "text/event-stream") ||
 			strings.Contains(contentType, "stream")
 	}
 	if !rw.isStreaming {
 		// Content-based fallback: detect SSE-like chunks even if Content-Type is missing/wrong.
 		if looksLikeSSEChunk(data) {
 			if err := rw.enableStreaming("sse heuristic"); err != nil {
 				return 0, err
 			}
 		} else if rw.body.Len()+len(data) > maxBufferedResponseBytes {
 			// Safety cap: avoid unbounded buffering on large responses.
 			log.Warnf("amp response rewriter: buffer exceeded %d bytes, switching to streaming", maxBufferedResponseBytes)
 			if err := rw.enableStreaming("buffer limit"); err != nil {
 				return 0, err
 			}
 		}
 	}
 	if rw.isStreaming {
-		return rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
+		n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
 		if err == nil {
 			if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
 				flusher.Flush()
 			}
 		}
 		return n, err
 	}
 	return rw.body.Write(data)
 }
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -648,8 +648,9 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
 		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
-		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
+		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
-		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
+		{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
 		{ID: "deepseek-v3.2-exp", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
 		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
 		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -11,6 +11,8 @@ import (
 	"fmt"
 	"io"
 	"net/http"
 	"regexp"
 	"strconv"
 	"strings"
 	"time"
@@ -784,20 +786,45 @@ func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
 	// Try to parse the retryDelay from the error response
 	// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
 	details := gjson.GetBytes(errorBody, "error.details")
-	if !details.Exists() || !details.IsArray() {
+	if details.Exists() && details.IsArray() {
-		return nil, fmt.Errorf("no error.details found")
+		for _, detail := range details.Array() {
 			typeVal := detail.Get("@type").String()
 			if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
 				retryDelay := detail.Get("retryDelay").String()
 				if retryDelay != "" {
 					// Parse duration string like "0.847655010s"
 					duration, err := time.ParseDuration(retryDelay)
 					if err != nil {
 						return nil, fmt.Errorf("failed to parse duration")
 					}
 					return &duration, nil
 				}
 			}
 		}
 		// Fallback: try ErrorInfo.metadata.quotaResetDelay (e.g., "373.801628ms")
 		for _, detail := range details.Array() {
 			typeVal := detail.Get("@type").String()
 			if typeVal == "type.googleapis.com/google.rpc.ErrorInfo" {
 				quotaResetDelay := detail.Get("metadata.quotaResetDelay").String()
 				if quotaResetDelay != "" {
 					duration, err := time.ParseDuration(quotaResetDelay)
 					if err == nil {
 						return &duration, nil
 					}
 				}
 			}
 		}
 	}
-	for _, detail := range details.Array() {
+	// Fallback: parse from error.message "Your quota will reset after Xs."
-		typeVal := detail.Get("@type").String()
+	message := gjson.GetBytes(errorBody, "error.message").String()
-		if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
+	if message != "" {
-			retryDelay := detail.Get("retryDelay").String()
+		re := regexp.MustCompile(`after\s+(\d+)s\.?`)
-			if retryDelay != "" {
+		if matches := re.FindStringSubmatch(message); len(matches) > 1 {
-				// Parse duration string like "0.847655010s"
+			seconds, err := strconv.Atoi(matches[1])
-				duration, err := time.ParseDuration(retryDelay)
+			if err == nil {
-				if err != nil {
+				duration := time.Duration(seconds) * time.Second
 					return nil, fmt.Errorf("failed to parse duration")
 				}
 				return &duration, nil
 			}
 		}
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
--- a/internal/translator/init.go
+++ b/internal/translator/init.go
@@ -35,5 +35,5 @@ import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/openai/responses"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/claude"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/openai/chat-completions"
+	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/openai"
 )
--- a/internal/translator/kiro/claude/init.go
+++ b/internal/translator/kiro/claude/init.go
@@ -1,3 +1,4 @@
 // Package claude provides translation between Kiro and Claude formats.
 package claude
 import (
@@ -12,8 +13,8 @@ func init() {
 		Kiro,
 		ConvertClaudeRequestToKiro,
 		interfaces.TranslateResponse{
-			Stream:    ConvertKiroResponseToClaude,
+			Stream:    ConvertKiroStreamToClaude,
-			NonStream: ConvertKiroResponseToClaudeNonStream,
+			NonStream: ConvertKiroNonStreamToClaude,
 		},
 	)
 }
--- a/internal/translator/kiro/claude/kiro_claude.go
+++ b/internal/translator/kiro/claude/kiro_claude.go
@@ -1,27 +1,21 @@
 // Package claude provides translation between Kiro and Claude formats.
 // Since Kiro executor generates Claude-compatible SSE format internally (with event: prefix),
-// translations are pass-through.
+// translations are pass-through for streaming, but responses need proper formatting.
 package claude
 import (
 	"bytes"
 	"context"
 )
-// ConvertClaudeRequestToKiro converts Claude request to Kiro format.
+// ConvertKiroStreamToClaude converts Kiro streaming response to Claude format.
 // Since Kiro uses Claude format internally, this is mostly a pass-through.
 func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bool) []byte {
 	return bytes.Clone(inputRawJSON)
 }
 // ConvertKiroResponseToClaude converts Kiro streaming response to Claude format.
 // Kiro executor already generates complete SSE format with "event:" prefix,
 // so this is a simple pass-through.
-func ConvertKiroResponseToClaude(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
+func ConvertKiroStreamToClaude(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
 	return []string{string(rawResponse)}
 }
-// ConvertKiroResponseToClaudeNonStream converts Kiro non-streaming response to Claude format.
+// ConvertKiroNonStreamToClaude converts Kiro non-streaming response to Claude format.
-func ConvertKiroResponseToClaudeNonStream(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) string {
+// The response is already in Claude format, so this is a pass-through.
 func ConvertKiroNonStreamToClaude(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) string {
 	return string(rawResponse)
 }
--- a/internal/translator/kiro/claude/kiro_claude_request.go
+++ b/internal/translator/kiro/claude/kiro_claude_request.go
@@ -0,0 +1,773 @@
 // Package claude provides request translation functionality for Claude API to Kiro format.
 // It handles parsing and transforming Claude API requests into the Kiro/Amazon Q API format,
 // extracting model information, system instructions, message contents, and tool declarations.
 package claude
 import (
 	"encoding/json"
 	"fmt"
 	"strings"
 	"time"
 	"unicode/utf8"
 	"github.com/google/uuid"
 	kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 )
 // Kiro API request structs - field order determines JSON key order
 // KiroPayload is the top-level request structure for Kiro API
 type KiroPayload struct {
 	ConversationState KiroConversationState `json:"conversationState"`
 	ProfileArn        string                `json:"profileArn,omitempty"`
 	InferenceConfig   *KiroInferenceConfig  `json:"inferenceConfig,omitempty"`
 }
 // KiroInferenceConfig contains inference parameters for the Kiro API.
 type KiroInferenceConfig struct {
 	MaxTokens   int     `json:"maxTokens,omitempty"`
 	Temperature float64 `json:"temperature,omitempty"`
 	TopP        float64 `json:"topP,omitempty"`
 }
 // KiroConversationState holds the conversation context
 type KiroConversationState struct {
 	ChatTriggerType string               `json:"chatTriggerType"` // Required: "MANUAL" - must be first field
 	ConversationID  string               `json:"conversationId"`
 	CurrentMessage  KiroCurrentMessage   `json:"currentMessage"`
 	History         []KiroHistoryMessage `json:"history,omitempty"`
 }
 // KiroCurrentMessage wraps the current user message
 type KiroCurrentMessage struct {
 	UserInputMessage KiroUserInputMessage `json:"userInputMessage"`
 }
 // KiroHistoryMessage represents a message in the conversation history
 type KiroHistoryMessage struct {
 	UserInputMessage         *KiroUserInputMessage         `json:"userInputMessage,omitempty"`
 	AssistantResponseMessage *KiroAssistantResponseMessage `json:"assistantResponseMessage,omitempty"`
 }
 // KiroImage represents an image in Kiro API format
 type KiroImage struct {
 	Format string          `json:"format"`
 	Source KiroImageSource `json:"source"`
 }
 // KiroImageSource contains the image data
 type KiroImageSource struct {
 	Bytes string `json:"bytes"` // base64 encoded image data
 }
 // KiroUserInputMessage represents a user message
 type KiroUserInputMessage struct {
 	Content                 string                       `json:"content"`
 	ModelID                 string                       `json:"modelId"`
 	Origin                  string                       `json:"origin"`
 	Images                  []KiroImage                  `json:"images,omitempty"`
 	UserInputMessageContext *KiroUserInputMessageContext `json:"userInputMessageContext,omitempty"`
 }
 // KiroUserInputMessageContext contains tool-related context
 type KiroUserInputMessageContext struct {
 	ToolResults []KiroToolResult  `json:"toolResults,omitempty"`
 	Tools       []KiroToolWrapper `json:"tools,omitempty"`
 }
 // KiroToolResult represents a tool execution result
 type KiroToolResult struct {
 	Content   []KiroTextContent `json:"content"`
 	Status    string            `json:"status"`
 	ToolUseID string            `json:"toolUseId"`
 }
 // KiroTextContent represents text content
 type KiroTextContent struct {
 	Text string `json:"text"`
 }
 // KiroToolWrapper wraps a tool specification
 type KiroToolWrapper struct {
 	ToolSpecification KiroToolSpecification `json:"toolSpecification"`
 }
 // KiroToolSpecification defines a tool's schema
 type KiroToolSpecification struct {
 	Name        string          `json:"name"`
 	Description string          `json:"description"`
 	InputSchema KiroInputSchema `json:"inputSchema"`
 }
 // KiroInputSchema wraps the JSON schema for tool input
 type KiroInputSchema struct {
 	JSON interface{} `json:"json"`
 }
 // KiroAssistantResponseMessage represents an assistant message
 type KiroAssistantResponseMessage struct {
 	Content  string        `json:"content"`
 	ToolUses []KiroToolUse `json:"toolUses,omitempty"`
 }
 // KiroToolUse represents a tool invocation by the assistant
 type KiroToolUse struct {
 	ToolUseID string                 `json:"toolUseId"`
 	Name      string                 `json:"name"`
 	Input     map[string]interface{} `json:"input"`
 }
 // ConvertClaudeRequestToKiro converts a Claude API request to Kiro format.
 // This is the main entry point for request translation.
 func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bool) []byte {
 	// For Kiro, we pass through the Claude format since buildKiroPayload
 	// expects Claude format and does the conversion internally.
 	// The actual conversion happens in the executor when building the HTTP request.
 	return inputRawJSON
 }
 // BuildKiroPayload constructs the Kiro API request payload from Claude format.
 // Supports tool calling - tools are passed via userInputMessageContext.
 // origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
 // isAgentic parameter enables chunked write optimization prompt for -agentic model variants.
 // isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
 // Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint.
 func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
 	// Extract max_tokens for potential use in inferenceConfig
 	// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
 	const kiroMaxOutputTokens = 32000
 	var maxTokens int64
 	if mt := gjson.GetBytes(claudeBody, "max_tokens"); mt.Exists() {
 		maxTokens = mt.Int()
 		if maxTokens == -1 {
 			maxTokens = kiroMaxOutputTokens
 			log.Debugf("kiro: max_tokens=-1 converted to %d", kiroMaxOutputTokens)
 		}
 	}
 	// Extract temperature if specified
 	var temperature float64
 	var hasTemperature bool
 	if temp := gjson.GetBytes(claudeBody, "temperature"); temp.Exists() {
 		temperature = temp.Float()
 		hasTemperature = true
 	}
 	// Extract top_p if specified
 	var topP float64
 	var hasTopP bool
 	if tp := gjson.GetBytes(claudeBody, "top_p"); tp.Exists() {
 		topP = tp.Float()
 		hasTopP = true
 		log.Debugf("kiro: extracted top_p: %.2f", topP)
 	}
 	// Normalize origin value for Kiro API compatibility
 	origin = normalizeOrigin(origin)
 	log.Debugf("kiro: normalized origin value: %s", origin)
 	messages := gjson.GetBytes(claudeBody, "messages")
 	// For chat-only mode, don't include tools
 	var tools gjson.Result
 	if !isChatOnly {
 		tools = gjson.GetBytes(claudeBody, "tools")
 	}
 	// Extract system prompt
 	systemPrompt := extractSystemPrompt(claudeBody)
 	// Check for thinking mode using the comprehensive IsThinkingEnabled function
 	// This supports Claude API format, OpenAI reasoning_effort, and AMP/Cursor format
 	thinkingEnabled := IsThinkingEnabled(claudeBody)
 	_, budgetTokens := checkThinkingMode(claudeBody) // Get budget tokens from Claude format if available
 	if budgetTokens <= 0 {
 		// Calculate budgetTokens based on max_tokens if available
 		// Use 50% of max_tokens for thinking, with min 8000 and max 24000
 		if maxTokens > 0 {
 			budgetTokens = maxTokens / 2
 			if budgetTokens < 8000 {
 				budgetTokens = 8000
 			}
 			if budgetTokens > 24000 {
 				budgetTokens = 24000
 			}
 			log.Debugf("kiro: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
 		} else {
 			budgetTokens = 16000 // Default budget tokens
 		}
 	}
 	// Inject timestamp context
 	timestamp := time.Now().Format("2006-01-02 15:04:05 MST")
 	timestampContext := fmt.Sprintf("[Context: Current time is %s]", timestamp)
 	if systemPrompt != "" {
 		systemPrompt = timestampContext + "\n\n" + systemPrompt
 	} else {
 		systemPrompt = timestampContext
 	}
 	log.Debugf("kiro: injected timestamp context: %s", timestamp)
 	// Inject agentic optimization prompt for -agentic model variants
 	if isAgentic {
 		if systemPrompt != "" {
 			systemPrompt += "\n"
 		}
 		systemPrompt += kirocommon.KiroAgenticSystemPrompt
 	}
 	// Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints
 	// Claude tool_choice values: {"type": "auto/any/tool", "name": "..."}
 	toolChoiceHint := extractClaudeToolChoiceHint(claudeBody)
 	if toolChoiceHint != "" {
 		if systemPrompt != "" {
 			systemPrompt += "\n"
 		}
 		systemPrompt += toolChoiceHint
 		log.Debugf("kiro: injected tool_choice hint into system prompt")
 	}
 	// Inject thinking hint when thinking mode is enabled
 	if thinkingEnabled {
 		if systemPrompt != "" {
 			systemPrompt += "\n"
 		}
 		dynamicThinkingHint := fmt.Sprintf("<thinking_mode>interleaved</thinking_mode><max_thinking_length>%d</max_thinking_length>", budgetTokens)
 		systemPrompt += dynamicThinkingHint
 		log.Debugf("kiro: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
 	}
 	// Convert Claude tools to Kiro format
 	kiroTools := convertClaudeToolsToKiro(tools)
 	// Process messages and build history
 	history, currentUserMsg, currentToolResults := processMessages(messages, modelID, origin)
 	// Build content with system prompt
 	if currentUserMsg != nil {
 		currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, systemPrompt, currentToolResults)
 		// Deduplicate currentToolResults
 		currentToolResults = deduplicateToolResults(currentToolResults)
 		// Build userInputMessageContext with tools and tool results
 		if len(kiroTools) > 0 || len(currentToolResults) > 0 {
 			currentUserMsg.UserInputMessageContext = &KiroUserInputMessageContext{
 				Tools:       kiroTools,
 				ToolResults: currentToolResults,
 			}
 		}
 	}
 	// Build payload
 	var currentMessage KiroCurrentMessage
 	if currentUserMsg != nil {
 		currentMessage = KiroCurrentMessage{UserInputMessage: *currentUserMsg}
 	} else {
 		fallbackContent := ""
 		if systemPrompt != "" {
 			fallbackContent = "--- SYSTEM PROMPT ---\n" + systemPrompt + "\n--- END SYSTEM PROMPT ---\n"
 		}
 		currentMessage = KiroCurrentMessage{UserInputMessage: KiroUserInputMessage{
 			Content: fallbackContent,
 			ModelID: modelID,
 			Origin:  origin,
 		}}
 	}
 	// Build inferenceConfig if we have any inference parameters
 	var inferenceConfig *KiroInferenceConfig
 	if maxTokens > 0 || hasTemperature || hasTopP {
 		inferenceConfig = &KiroInferenceConfig{}
 		if maxTokens > 0 {
 			inferenceConfig.MaxTokens = int(maxTokens)
 		}
 		if hasTemperature {
 			inferenceConfig.Temperature = temperature
 		}
 		if hasTopP {
 			inferenceConfig.TopP = topP
 		}
 	}
 	payload := KiroPayload{
 		ConversationState: KiroConversationState{
 			ChatTriggerType: "MANUAL",
 			ConversationID:  uuid.New().String(),
 			CurrentMessage:  currentMessage,
 			History:         history,
 		},
 		ProfileArn:      profileArn,
 		InferenceConfig: inferenceConfig,
 	}
 	result, err := json.Marshal(payload)
 	if err != nil {
 		log.Debugf("kiro: failed to marshal payload: %v", err)
 		return nil
 	}
 	return result
 }
 // normalizeOrigin normalizes origin value for Kiro API compatibility
 func normalizeOrigin(origin string) string {
 	switch origin {
 	case "KIRO_CLI":
 		return "CLI"
 	case "KIRO_AI_EDITOR":
 		return "AI_EDITOR"
 	case "AMAZON_Q":
 		return "CLI"
 	case "KIRO_IDE":
 		return "AI_EDITOR"
 	default:
 		return origin
 	}
 }
 // extractSystemPrompt extracts system prompt from Claude request
 func extractSystemPrompt(claudeBody []byte) string {
 	systemField := gjson.GetBytes(claudeBody, "system")
 	if systemField.IsArray() {
 		var sb strings.Builder
 		for _, block := range systemField.Array() {
 			if block.Get("type").String() == "text" {
 				sb.WriteString(block.Get("text").String())
 			} else if block.Type == gjson.String {
 				sb.WriteString(block.String())
 			}
 		}
 		return sb.String()
 	}
 	return systemField.String()
 }
 // checkThinkingMode checks if thinking mode is enabled in the Claude request
 func checkThinkingMode(claudeBody []byte) (bool, int64) {
 	thinkingEnabled := false
 	var budgetTokens int64 = 16000
 	thinkingField := gjson.GetBytes(claudeBody, "thinking")
 	if thinkingField.Exists() {
 		thinkingType := thinkingField.Get("type").String()
 		if thinkingType == "enabled" {
 			thinkingEnabled = true
 			if bt := thinkingField.Get("budget_tokens"); bt.Exists() {
 				budgetTokens = bt.Int()
 				if budgetTokens <= 0 {
 					thinkingEnabled = false
 					log.Debugf("kiro: thinking mode disabled via budget_tokens <= 0")
 				}
 			}
 			if thinkingEnabled {
 				log.Debugf("kiro: thinking mode enabled via Claude API parameter, budget_tokens: %d", budgetTokens)
 			}
 		}
 	}
 	return thinkingEnabled, budgetTokens
 }
 // IsThinkingEnabled is a public wrapper to check if thinking mode is enabled.
 // This is used by the executor to determine whether to parse <thinking> tags in responses.
 // When thinking is NOT enabled in the request, <thinking> tags in responses should be
 // treated as regular text content, not as thinking blocks.
 //
 // Supports multiple formats:
 // - Claude API format: thinking.type = "enabled"
 // - OpenAI format: reasoning_effort parameter
 // - AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
 func IsThinkingEnabled(body []byte) bool {
 	// Check Claude API format first (thinking.type = "enabled")
 	enabled, _ := checkThinkingMode(body)
 	if enabled {
 		log.Debugf("kiro: IsThinkingEnabled returning true (Claude API format)")
 		return true
 	}
 	// Check OpenAI format: reasoning_effort parameter
 	// Valid values: "low", "medium", "high", "auto" (not "none")
 	reasoningEffort := gjson.GetBytes(body, "reasoning_effort")
 	if reasoningEffort.Exists() {
 		effort := reasoningEffort.String()
 		if effort != "" && effort != "none" {
 			log.Debugf("kiro: thinking mode enabled via OpenAI reasoning_effort: %s", effort)
 			return true
 		}
 	}
 	// Check AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
 	// This is how AMP client passes thinking configuration
 	bodyStr := string(body)
 	if strings.Contains(bodyStr, "<thinking_mode>") && strings.Contains(bodyStr, "</thinking_mode>") {
 		// Extract thinking mode value
 		startTag := "<thinking_mode>"
 		endTag := "</thinking_mode>"
 		startIdx := strings.Index(bodyStr, startTag)
 		if startIdx >= 0 {
 			startIdx += len(startTag)
 			endIdx := strings.Index(bodyStr[startIdx:], endTag)
 			if endIdx >= 0 {
 				thinkingMode := bodyStr[startIdx : startIdx+endIdx]
 				if thinkingMode == "interleaved" || thinkingMode == "enabled" {
 					log.Debugf("kiro: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
 					return true
 				}
 			}
 		}
 	}
 	// Check OpenAI format: max_completion_tokens with reasoning (o1-style)
 	// Some clients use this to indicate reasoning mode
 	if gjson.GetBytes(body, "max_completion_tokens").Exists() {
 		// If max_completion_tokens is set, check if model name suggests reasoning
 		model := gjson.GetBytes(body, "model").String()
 		if strings.Contains(strings.ToLower(model), "thinking") ||
 			strings.Contains(strings.ToLower(model), "reason") {
 			log.Debugf("kiro: thinking mode enabled via model name hint: %s", model)
 			return true
 		}
 	}
 	log.Debugf("kiro: IsThinkingEnabled returning false (no thinking mode detected)")
 	return false
 }
 // shortenToolNameIfNeeded shortens tool names that exceed 64 characters.
 // MCP tools often have long names like "mcp__server-name__tool-name".
 // This preserves the "mcp__" prefix and last segment when possible.
 func shortenToolNameIfNeeded(name string) string {
 	const limit = 64
 	if len(name) <= limit {
 		return name
 	}
 	// For MCP tools, try to preserve prefix and last segment
 	if strings.HasPrefix(name, "mcp__") {
 		idx := strings.LastIndex(name, "__")
 		if idx > 0 {
 			cand := "mcp__" + name[idx+2:]
 			if len(cand) > limit {
 				return cand[:limit]
 			}
 			return cand
 		}
 	}
 	return name[:limit]
 }
 // convertClaudeToolsToKiro converts Claude tools to Kiro format
 func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 	var kiroTools []KiroToolWrapper
 	if !tools.IsArray() {
 		return kiroTools
 	}
 	for _, tool := range tools.Array() {
 		name := tool.Get("name").String()
 		description := tool.Get("description").String()
 		inputSchema := tool.Get("input_schema").Value()
 		// Shorten tool name if it exceeds 64 characters (common with MCP tools)
 		originalName := name
 		name = shortenToolNameIfNeeded(name)
 		if name != originalName {
 			log.Debugf("kiro: shortened tool name from '%s' to '%s'", originalName, name)
 		}
 		// CRITICAL FIX: Kiro API requires non-empty description
 		if strings.TrimSpace(description) == "" {
 			description = fmt.Sprintf("Tool: %s", name)
 			log.Debugf("kiro: tool '%s' has empty description, using default: %s", name, description)
 		}
 		// Truncate long descriptions
 		if len(description) > kirocommon.KiroMaxToolDescLen {
 			truncLen := kirocommon.KiroMaxToolDescLen - 30
 			for truncLen > 0 && !utf8.RuneStart(description[truncLen]) {
 				truncLen--
 			}
 			description = description[:truncLen] + "... (description truncated)"
 		}
 		kiroTools = append(kiroTools, KiroToolWrapper{
 			ToolSpecification: KiroToolSpecification{
 				Name:        name,
 				Description: description,
 				InputSchema: KiroInputSchema{JSON: inputSchema},
 			},
 		})
 	}
 	return kiroTools
 }
 // processMessages processes Claude messages and builds Kiro history
 func processMessages(messages gjson.Result, modelID, origin string) ([]KiroHistoryMessage, *KiroUserInputMessage, []KiroToolResult) {
 	var history []KiroHistoryMessage
 	var currentUserMsg *KiroUserInputMessage
 	var currentToolResults []KiroToolResult
 	// Merge adjacent messages with the same role
 	messagesArray := kirocommon.MergeAdjacentMessages(messages.Array())
 	for i, msg := range messagesArray {
 		role := msg.Get("role").String()
 		isLastMessage := i == len(messagesArray)-1
 		if role == "user" {
 			userMsg, toolResults := BuildUserMessageStruct(msg, modelID, origin)
 			if isLastMessage {
 				currentUserMsg = &userMsg
 				currentToolResults = toolResults
 			} else {
 				// CRITICAL: Kiro API requires content to be non-empty for history messages too
 				if strings.TrimSpace(userMsg.Content) == "" {
 					if len(toolResults) > 0 {
 						userMsg.Content = "Tool results provided."
 					} else {
 						userMsg.Content = "Continue"
 					}
 				}
 				// For history messages, embed tool results in context
 				if len(toolResults) > 0 {
 					userMsg.UserInputMessageContext = &KiroUserInputMessageContext{
 						ToolResults: toolResults,
 					}
 				}
 				history = append(history, KiroHistoryMessage{
 					UserInputMessage: &userMsg,
 				})
 			}
 		} else if role == "assistant" {
 			assistantMsg := BuildAssistantMessageStruct(msg)
 			if isLastMessage {
 				history = append(history, KiroHistoryMessage{
 					AssistantResponseMessage: &assistantMsg,
 				})
 				// Create a "Continue" user message as currentMessage
 				currentUserMsg = &KiroUserInputMessage{
 					Content: "Continue",
 					ModelID: modelID,
 					Origin:  origin,
 				}
 			} else {
 				history = append(history, KiroHistoryMessage{
 					AssistantResponseMessage: &assistantMsg,
 				})
 			}
 		}
 	}
 	return history, currentUserMsg, currentToolResults
 }
 // buildFinalContent builds the final content with system prompt
 func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResult) string {
 	var contentBuilder strings.Builder
 	if systemPrompt != "" {
 		contentBuilder.WriteString("--- SYSTEM PROMPT ---\n")
 		contentBuilder.WriteString(systemPrompt)
 		contentBuilder.WriteString("\n--- END SYSTEM PROMPT ---\n\n")
 	}
 	contentBuilder.WriteString(content)
 	finalContent := contentBuilder.String()
 	// CRITICAL: Kiro API requires content to be non-empty
 	if strings.TrimSpace(finalContent) == "" {
 		if len(toolResults) > 0 {
 			finalContent = "Tool results provided."
 		} else {
 			finalContent = "Continue"
 		}
 		log.Debugf("kiro: content was empty, using default: %s", finalContent)
 	}
 	return finalContent
 }
 // deduplicateToolResults removes duplicate tool results
 func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
 	if len(toolResults) == 0 {
 		return toolResults
 	}
 	seenIDs := make(map[string]bool)
 	unique := make([]KiroToolResult, 0, len(toolResults))
 	for _, tr := range toolResults {
 		if !seenIDs[tr.ToolUseID] {
 			seenIDs[tr.ToolUseID] = true
 			unique = append(unique, tr)
 		} else {
 			log.Debugf("kiro: skipping duplicate toolResult in currentMessage: %s", tr.ToolUseID)
 		}
 	}
 	return unique
 }
 // extractClaudeToolChoiceHint extracts tool_choice from Claude request and returns a system prompt hint.
 // Claude tool_choice values:
 // - {"type": "auto"}: Model decides (default, no hint needed)
 // - {"type": "any"}: Must use at least one tool
 // - {"type": "tool", "name": "..."}: Must use specific tool
 func extractClaudeToolChoiceHint(claudeBody []byte) string {
 	toolChoice := gjson.GetBytes(claudeBody, "tool_choice")
 	if !toolChoice.Exists() {
 		return ""
 	}
 	toolChoiceType := toolChoice.Get("type").String()
 	switch toolChoiceType {
 	case "any":
 		return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]"
 	case "tool":
 		toolName := toolChoice.Get("name").String()
 		if toolName != "" {
 			return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName)
 		}
 	case "auto":
 		// Default behavior, no hint needed
 		return ""
 	}
 	return ""
 }
 // BuildUserMessageStruct builds a user message and extracts tool results
 func BuildUserMessageStruct(msg gjson.Result, modelID, origin string) (KiroUserInputMessage, []KiroToolResult) {
 	content := msg.Get("content")
 	var contentBuilder strings.Builder
 	var toolResults []KiroToolResult
 	var images []KiroImage
 	// Track seen toolUseIds to deduplicate
 	seenToolUseIDs := make(map[string]bool)
 	if content.IsArray() {
 		for _, part := range content.Array() {
 			partType := part.Get("type").String()
 			switch partType {
 			case "text":
 				contentBuilder.WriteString(part.Get("text").String())
 			case "image":
 				mediaType := part.Get("source.media_type").String()
 				data := part.Get("source.data").String()
 				format := ""
 				if idx := strings.LastIndex(mediaType, "/"); idx != -1 {
 					format = mediaType[idx+1:]
 				}
 				if format != "" && data != "" {
 					images = append(images, KiroImage{
 						Format: format,
 						Source: KiroImageSource{
 							Bytes: data,
 						},
 					})
 				}
 			case "tool_result":
 				toolUseID := part.Get("tool_use_id").String()
 				// Skip duplicate toolUseIds
 				if seenToolUseIDs[toolUseID] {
 					log.Debugf("kiro: skipping duplicate tool_result with toolUseId: %s", toolUseID)
 					continue
 				}
 				seenToolUseIDs[toolUseID] = true
 				isError := part.Get("is_error").Bool()
 				resultContent := part.Get("content")
 				var textContents []KiroTextContent
 				if resultContent.IsArray() {
 					for _, item := range resultContent.Array() {
 						if item.Get("type").String() == "text" {
 							textContents = append(textContents, KiroTextContent{Text: item.Get("text").String()})
 						} else if item.Type == gjson.String {
 							textContents = append(textContents, KiroTextContent{Text: item.String()})
 						}
 					}
 				} else if resultContent.Type == gjson.String {
 					textContents = append(textContents, KiroTextContent{Text: resultContent.String()})
 				}
 				if len(textContents) == 0 {
 					textContents = append(textContents, KiroTextContent{Text: "Tool use was cancelled by the user"})
 				}
 				status := "success"
 				if isError {
 					status = "error"
 				}
 				toolResults = append(toolResults, KiroToolResult{
 					ToolUseID: toolUseID,
 					Content:   textContents,
 					Status:    status,
 				})
 			}
 		}
 	} else {
 		contentBuilder.WriteString(content.String())
 	}
 	userMsg := KiroUserInputMessage{
 		Content: contentBuilder.String(),
 		ModelID: modelID,
 		Origin:  origin,
 	}
 	if len(images) > 0 {
 		userMsg.Images = images
 	}
 	return userMsg, toolResults
 }
 // BuildAssistantMessageStruct builds an assistant message with tool uses
 func BuildAssistantMessageStruct(msg gjson.Result) KiroAssistantResponseMessage {
 	content := msg.Get("content")
 	var contentBuilder strings.Builder
 	var toolUses []KiroToolUse
 	if content.IsArray() {
 		for _, part := range content.Array() {
 			partType := part.Get("type").String()
 			switch partType {
 			case "text":
 				contentBuilder.WriteString(part.Get("text").String())
 			case "tool_use":
 				toolUseID := part.Get("id").String()
 				toolName := part.Get("name").String()
 				toolInput := part.Get("input")
 				var inputMap map[string]interface{}
 				if toolInput.IsObject() {
 					inputMap = make(map[string]interface{})
 					toolInput.ForEach(func(key, value gjson.Result) bool {
 						inputMap[key.String()] = value.Value()
 						return true
 					})
 				}
 				toolUses = append(toolUses, KiroToolUse{
 					ToolUseID: toolUseID,
 					Name:      toolName,
 					Input:     inputMap,
 				})
 			}
 		}
 	} else {
 		contentBuilder.WriteString(content.String())
 	}
 	return KiroAssistantResponseMessage{
 		Content:  contentBuilder.String(),
 		ToolUses: toolUses,
 	}
 }
--- a/internal/translator/kiro/claude/kiro_claude_response.go
+++ b/internal/translator/kiro/claude/kiro_claude_response.go
@@ -0,0 +1,184 @@
 // Package claude provides response translation functionality for Kiro API to Claude format.
 // This package handles the conversion of Kiro API responses into Claude-compatible format,
 // including support for thinking blocks and tool use.
 package claude
 import (
 	"encoding/json"
 	"strings"
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
 	log "github.com/sirupsen/logrus"
 	kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
 )
 // Local references to kirocommon constants for thinking block parsing
 var (
 	thinkingStartTag = kirocommon.ThinkingStartTag
 	thinkingEndTag   = kirocommon.ThinkingEndTag
 )
 // BuildClaudeResponse constructs a Claude-compatible response.
 // Supports tool_use blocks when tools are present in the response.
 // Supports thinking blocks - parses <thinking> tags and converts to Claude thinking content blocks.
 // stopReason is passed from upstream; fallback logic applied if empty.
 func BuildClaudeResponse(content string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte {
 	var contentBlocks []map[string]interface{}
 	// Extract thinking blocks and text from content
 	if content != "" {
 		blocks := ExtractThinkingFromContent(content)
 		contentBlocks = append(contentBlocks, blocks...)
 		// Log if thinking blocks were extracted
 		for _, block := range blocks {
 			if block["type"] == "thinking" {
 				thinkingContent := block["thinking"].(string)
 				log.Infof("kiro: buildClaudeResponse extracted thinking block (len: %d)", len(thinkingContent))
 			}
 		}
 	}
 	// Add tool_use blocks
 	for _, toolUse := range toolUses {
 		contentBlocks = append(contentBlocks, map[string]interface{}{
 			"type":  "tool_use",
 			"id":    toolUse.ToolUseID,
 			"name":  toolUse.Name,
 			"input": toolUse.Input,
 		})
 	}
 	// Ensure at least one content block (Claude API requires non-empty content)
 	if len(contentBlocks) == 0 {
 		contentBlocks = append(contentBlocks, map[string]interface{}{
 			"type": "text",
 			"text": "",
 		})
 	}
 	// Use upstream stopReason; apply fallback logic if not provided
 	if stopReason == "" {
 		stopReason = "end_turn"
 		if len(toolUses) > 0 {
 			stopReason = "tool_use"
 		}
 		log.Debugf("kiro: buildClaudeResponse using fallback stop_reason: %s", stopReason)
 	}
 	// Log warning if response was truncated due to max_tokens
 	if stopReason == "max_tokens" {
 		log.Warnf("kiro: response truncated due to max_tokens limit (buildClaudeResponse)")
 	}
 	response := map[string]interface{}{
 		"id":          "msg_" + uuid.New().String()[:24],
 		"type":        "message",
 		"role":        "assistant",
 		"model":       model,
 		"content":     contentBlocks,
 		"stop_reason": stopReason,
 		"usage": map[string]interface{}{
 			"input_tokens":  usageInfo.InputTokens,
 			"output_tokens": usageInfo.OutputTokens,
 		},
 	}
 	result, _ := json.Marshal(response)
 	return result
 }
 // ExtractThinkingFromContent parses content to extract thinking blocks and text.
 // Returns a list of content blocks in the order they appear in the content.
 // Handles interleaved thinking and text blocks correctly.
 func ExtractThinkingFromContent(content string) []map[string]interface{} {
 	var blocks []map[string]interface{}
 	if content == "" {
 		return blocks
 	}
 	// Check if content contains thinking tags at all
 	if !strings.Contains(content, thinkingStartTag) {
 		// No thinking tags, return as plain text
 		return []map[string]interface{}{
 			{
 				"type": "text",
 				"text": content,
 			},
 		}
 	}
 	log.Debugf("kiro: extractThinkingFromContent - found thinking tags in content (len: %d)", len(content))
 	remaining := content
 	for len(remaining) > 0 {
 		// Look for <thinking> tag
 		startIdx := strings.Index(remaining, thinkingStartTag)
 		if startIdx == -1 {
 			// No more thinking tags, add remaining as text
 			if strings.TrimSpace(remaining) != "" {
 				blocks = append(blocks, map[string]interface{}{
 					"type": "text",
 					"text": remaining,
 				})
 			}
 			break
 		}
 		// Add text before thinking tag (if any meaningful content)
 		if startIdx > 0 {
 			textBefore := remaining[:startIdx]
 			if strings.TrimSpace(textBefore) != "" {
 				blocks = append(blocks, map[string]interface{}{
 					"type": "text",
 					"text": textBefore,
 				})
 			}
 		}
 		// Move past the opening tag
 		remaining = remaining[startIdx+len(thinkingStartTag):]
 		// Find closing tag
 		endIdx := strings.Index(remaining, thinkingEndTag)
 		if endIdx == -1 {
 			// No closing tag found, treat rest as thinking content (incomplete response)
 			if strings.TrimSpace(remaining) != "" {
 				blocks = append(blocks, map[string]interface{}{
 					"type":     "thinking",
 					"thinking": remaining,
 				})
 				log.Warnf("kiro: extractThinkingFromContent - missing closing </thinking> tag")
 			}
 			break
 		}
 		// Extract thinking content between tags
 		thinkContent := remaining[:endIdx]
 		if strings.TrimSpace(thinkContent) != "" {
 			blocks = append(blocks, map[string]interface{}{
 				"type":     "thinking",
 				"thinking": thinkContent,
 			})
 			log.Debugf("kiro: extractThinkingFromContent - extracted thinking block (len: %d)", len(thinkContent))
 		}
 		// Move past the closing tag
 		remaining = remaining[endIdx+len(thinkingEndTag):]
 	}
 	// If no blocks were created (all whitespace), return empty text block
 	if len(blocks) == 0 {
 		blocks = append(blocks, map[string]interface{}{
 			"type": "text",
 			"text": "",
 		})
 	}
 	return blocks
 }
--- a/internal/translator/kiro/claude/kiro_claude_stream.go
+++ b/internal/translator/kiro/claude/kiro_claude_stream.go
@@ -0,0 +1,176 @@
 // Package claude provides streaming SSE event building for Claude format.
 // This package handles the construction of Claude-compatible Server-Sent Events (SSE)
 // for streaming responses from Kiro API.
 package claude
 import (
 	"encoding/json"
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
 )
 // BuildClaudeMessageStartEvent creates the message_start SSE event
 func BuildClaudeMessageStartEvent(model string, inputTokens int64) []byte {
 	event := map[string]interface{}{
 		"type": "message_start",
 		"message": map[string]interface{}{
 			"id":            "msg_" + uuid.New().String()[:24],
 			"type":          "message",
 			"role":          "assistant",
 			"content":       []interface{}{},
 			"model":         model,
 			"stop_reason":   nil,
 			"stop_sequence": nil,
 			"usage":         map[string]interface{}{"input_tokens": inputTokens, "output_tokens": 0},
 		},
 	}
 	result, _ := json.Marshal(event)
 	return []byte("event: message_start\ndata: " + string(result))
 }
 // BuildClaudeContentBlockStartEvent creates a content_block_start SSE event
 func BuildClaudeContentBlockStartEvent(index int, blockType, toolUseID, toolName string) []byte {
 	var contentBlock map[string]interface{}
 	switch blockType {
 	case "tool_use":
 		contentBlock = map[string]interface{}{
 			"type":  "tool_use",
 			"id":    toolUseID,
 			"name":  toolName,
 			"input": map[string]interface{}{},
 		}
 	case "thinking":
 		contentBlock = map[string]interface{}{
 			"type":     "thinking",
 			"thinking": "",
 		}
 	default:
 		contentBlock = map[string]interface{}{
 			"type": "text",
 			"text": "",
 		}
 	}
 	event := map[string]interface{}{
 		"type":          "content_block_start",
 		"index":         index,
 		"content_block": contentBlock,
 	}
 	result, _ := json.Marshal(event)
 	return []byte("event: content_block_start\ndata: " + string(result))
 }
 // BuildClaudeStreamEvent creates a text_delta content_block_delta SSE event
 func BuildClaudeStreamEvent(contentDelta string, index int) []byte {
 	event := map[string]interface{}{
 		"type":  "content_block_delta",
 		"index": index,
 		"delta": map[string]interface{}{
 			"type": "text_delta",
 			"text": contentDelta,
 		},
 	}
 	result, _ := json.Marshal(event)
 	return []byte("event: content_block_delta\ndata: " + string(result))
 }
 // BuildClaudeInputJsonDeltaEvent creates an input_json_delta event for tool use streaming
 func BuildClaudeInputJsonDeltaEvent(partialJSON string, index int) []byte {
 	event := map[string]interface{}{
 		"type":  "content_block_delta",
 		"index": index,
 		"delta": map[string]interface{}{
 			"type":         "input_json_delta",
 			"partial_json": partialJSON,
 		},
 	}
 	result, _ := json.Marshal(event)
 	return []byte("event: content_block_delta\ndata: " + string(result))
 }
 // BuildClaudeContentBlockStopEvent creates a content_block_stop SSE event
 func BuildClaudeContentBlockStopEvent(index int) []byte {
 	event := map[string]interface{}{
 		"type":  "content_block_stop",
 		"index": index,
 	}
 	result, _ := json.Marshal(event)
 	return []byte("event: content_block_stop\ndata: " + string(result))
 }
 // BuildClaudeMessageDeltaEvent creates the message_delta event with stop_reason and usage
 func BuildClaudeMessageDeltaEvent(stopReason string, usageInfo usage.Detail) []byte {
 	deltaEvent := map[string]interface{}{
 		"type": "message_delta",
 		"delta": map[string]interface{}{
 			"stop_reason":   stopReason,
 			"stop_sequence": nil,
 		},
 		"usage": map[string]interface{}{
 			"input_tokens":  usageInfo.InputTokens,
 			"output_tokens": usageInfo.OutputTokens,
 		},
 	}
 	deltaResult, _ := json.Marshal(deltaEvent)
 	return []byte("event: message_delta\ndata: " + string(deltaResult))
 }
 // BuildClaudeMessageStopOnlyEvent creates only the message_stop event
 func BuildClaudeMessageStopOnlyEvent() []byte {
 	stopEvent := map[string]interface{}{
 		"type": "message_stop",
 	}
 	stopResult, _ := json.Marshal(stopEvent)
 	return []byte("event: message_stop\ndata: " + string(stopResult))
 }
 // BuildClaudePingEventWithUsage creates a ping event with embedded usage information.
 // This is used for real-time usage estimation during streaming.
 func BuildClaudePingEventWithUsage(inputTokens, outputTokens int64) []byte {
 	event := map[string]interface{}{
 		"type": "ping",
 		"usage": map[string]interface{}{
 			"input_tokens":  inputTokens,
 			"output_tokens": outputTokens,
 			"total_tokens":  inputTokens + outputTokens,
 			"estimated":     true,
 		},
 	}
 	result, _ := json.Marshal(event)
 	return []byte("event: ping\ndata: " + string(result))
 }
 // BuildClaudeThinkingDeltaEvent creates a thinking_delta event for Claude API compatibility.
 // This is used when streaming thinking content wrapped in <thinking> tags.
 func BuildClaudeThinkingDeltaEvent(thinkingDelta string, index int) []byte {
 	event := map[string]interface{}{
 		"type":  "content_block_delta",
 		"index": index,
 		"delta": map[string]interface{}{
 			"type":     "thinking_delta",
 			"thinking": thinkingDelta,
 		},
 	}
 	result, _ := json.Marshal(event)
 	return []byte("event: content_block_delta\ndata: " + string(result))
 }
 // PendingTagSuffix detects if the buffer ends with a partial prefix of the given tag.
 // Returns the length of the partial match (0 if no match).
 // Based on amq2api implementation for handling cross-chunk tag boundaries.
 func PendingTagSuffix(buffer, tag string) int {
 	if buffer == "" || tag == "" {
 		return 0
 	}
 	maxLen := len(buffer)
 	if maxLen > len(tag)-1 {
 		maxLen = len(tag) - 1
 	}
 	for length := maxLen; length > 0; length-- {
 		if len(buffer) >= length && buffer[len(buffer)-length:] == tag[:length] {
 			return length
 		}
 	}
 	return 0
 }
--- a/internal/translator/kiro/claude/kiro_claude_tools.go
+++ b/internal/translator/kiro/claude/kiro_claude_tools.go
@@ -0,0 +1,522 @@
 // Package claude provides tool calling support for Kiro to Claude translation.
 // This package handles parsing embedded tool calls, JSON repair, and deduplication.
 package claude
 import (
 	"encoding/json"
 	"regexp"
 	"strings"
 	"github.com/google/uuid"
 	kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
 	log "github.com/sirupsen/logrus"
 )
 // ToolUseState tracks the state of an in-progress tool use during streaming.
 type ToolUseState struct {
 	ToolUseID   string
 	Name        string
 	InputBuffer strings.Builder
 	IsComplete  bool
 }
 // Pre-compiled regex patterns for performance
 var (
 	// embeddedToolCallPattern matches [Called tool_name with args: {...}] format
 	embeddedToolCallPattern = regexp.MustCompile(`\[Called\s+([A-Za-z0-9_.-]+)\s+with\s+args:\s*`)
 	// trailingCommaPattern matches trailing commas before closing braces/brackets
 	trailingCommaPattern = regexp.MustCompile(`,\s*([}\]])`)
 )
 // ParseEmbeddedToolCalls extracts [Called tool_name with args: {...}] format from text.
 // Kiro sometimes embeds tool calls in text content instead of using toolUseEvent.
 // Returns the cleaned text (with tool calls removed) and extracted tool uses.
 func ParseEmbeddedToolCalls(text string, processedIDs map[string]bool) (string, []KiroToolUse) {
 	if !strings.Contains(text, "[Called") {
 		return text, nil
 	}
 	var toolUses []KiroToolUse
 	cleanText := text
 	// Find all [Called markers
 	matches := embeddedToolCallPattern.FindAllStringSubmatchIndex(text, -1)
 	if len(matches) == 0 {
 		return text, nil
 	}
 	// Process matches in reverse order to maintain correct indices
 	for i := len(matches) - 1; i >= 0; i-- {
 		matchStart := matches[i][0]
 		toolNameStart := matches[i][2]
 		toolNameEnd := matches[i][3]
 		if toolNameStart < 0 || toolNameEnd < 0 {
 			continue
 		}
 		toolName := text[toolNameStart:toolNameEnd]
 		// Find the JSON object start (after "with args:")
 		jsonStart := matches[i][1]
 		if jsonStart >= len(text) {
 			continue
 		}
 		// Skip whitespace to find the opening brace
 		for jsonStart < len(text) && (text[jsonStart] == ' ' || text[jsonStart] == '\t') {
 			jsonStart++
 		}
 		if jsonStart >= len(text) || text[jsonStart] != '{' {
 			continue
 		}
 		// Find matching closing bracket
 		jsonEnd := findMatchingBracket(text, jsonStart)
 		if jsonEnd < 0 {
 			continue
 		}
 		// Extract JSON and find the closing bracket of [Called ...]
 		jsonStr := text[jsonStart : jsonEnd+1]
 		// Find the closing ] after the JSON
 		closingBracket := jsonEnd + 1
 		for closingBracket < len(text) && text[closingBracket] != ']' {
 			closingBracket++
 		}
 		if closingBracket >= len(text) {
 			continue
 		}
 		// End index of the full tool call (closing ']' inclusive)
 		matchEnd := closingBracket + 1
 		// Repair and parse JSON
 		repairedJSON := RepairJSON(jsonStr)
 		var inputMap map[string]interface{}
 		if err := json.Unmarshal([]byte(repairedJSON), &inputMap); err != nil {
 			log.Debugf("kiro: failed to parse embedded tool call JSON: %v, raw: %s", err, jsonStr)
 			continue
 		}
 		// Generate unique tool ID
 		toolUseID := "toolu_" + uuid.New().String()[:12]
 		// Check for duplicates using name+input as key
 		dedupeKey := toolName + ":" + repairedJSON
 		if processedIDs != nil {
 			if processedIDs[dedupeKey] {
 				log.Debugf("kiro: skipping duplicate embedded tool call: %s", toolName)
 				// Still remove from text even if duplicate
 				if matchStart >= 0 && matchEnd <= len(cleanText) && matchStart <= matchEnd {
 					cleanText = cleanText[:matchStart] + cleanText[matchEnd:]
 				}
 				continue
 			}
 			processedIDs[dedupeKey] = true
 		}
 		toolUses = append(toolUses, KiroToolUse{
 			ToolUseID: toolUseID,
 			Name:      toolName,
 			Input:     inputMap,
 		})
 		log.Infof("kiro: extracted embedded tool call: %s (ID: %s)", toolName, toolUseID)
 		// Remove from clean text (index-based removal to avoid deleting the wrong occurrence)
 		if matchStart >= 0 && matchEnd <= len(cleanText) && matchStart <= matchEnd {
 			cleanText = cleanText[:matchStart] + cleanText[matchEnd:]
 		}
 	}
 	return cleanText, toolUses
 }
 // findMatchingBracket finds the index of the closing brace/bracket that matches
 // the opening one at startPos. Handles nested objects and strings correctly.
 func findMatchingBracket(text string, startPos int) int {
 	if startPos >= len(text) {
 		return -1
 	}
 	openChar := text[startPos]
 	var closeChar byte
 	switch openChar {
 	case '{':
 		closeChar = '}'
 	case '[':
 		closeChar = ']'
 	default:
 		return -1
 	}
 	depth := 1
 	inString := false
 	escapeNext := false
 	for i := startPos + 1; i < len(text); i++ {
 		char := text[i]
 		if escapeNext {
 			escapeNext = false
 			continue
 		}
 		if char == '\\' && inString {
 			escapeNext = true
 			continue
 		}
 		if char == '"' {
 			inString = !inString
 			continue
 		}
 		if !inString {
 			if char == openChar {
 				depth++
 			} else if char == closeChar {
 				depth--
 				if depth == 0 {
 					return i
 				}
 			}
 		}
 	}
 	return -1
 }
 // RepairJSON attempts to fix common JSON issues that may occur in tool call arguments.
 // Conservative repair strategy:
 // 1. First try to parse JSON directly - if valid, return as-is
 // 2. Only attempt repair if parsing fails
 // 3. After repair, validate the result - if still invalid, return original
 func RepairJSON(jsonString string) string {
 	// Handle empty or invalid input
 	if jsonString == "" {
 		return "{}"
 	}
 	str := strings.TrimSpace(jsonString)
 	if str == "" {
 		return "{}"
 	}
 	// CONSERVATIVE STRATEGY: First try to parse directly
 	var testParse interface{}
 	if err := json.Unmarshal([]byte(str), &testParse); err == nil {
 		log.Debugf("kiro: repairJSON - JSON is already valid, returning unchanged")
 		return str
 	}
 	log.Debugf("kiro: repairJSON - JSON parse failed, attempting repair")
 	originalStr := str
 	// First, escape unescaped newlines/tabs within JSON string values
 	str = escapeNewlinesInStrings(str)
 	// Remove trailing commas before closing braces/brackets
 	str = trailingCommaPattern.ReplaceAllString(str, "$1")
 	// Calculate bracket balance
 	braceCount := 0
 	bracketCount := 0
 	inString := false
 	escape := false
 	lastValidIndex := -1
 	for i := 0; i < len(str); i++ {
 		char := str[i]
 		if escape {
 			escape = false
 			continue
 		}
 		if char == '\\' {
 			escape = true
 			continue
 		}
 		if char == '"' {
 			inString = !inString
 			continue
 		}
 		if inString {
 			continue
 		}
 		switch char {
 		case '{':
 			braceCount++
 		case '}':
 			braceCount--
 		case '[':
 			bracketCount++
 		case ']':
 			bracketCount--
 		}
 		if braceCount >= 0 && bracketCount >= 0 {
 			lastValidIndex = i
 		}
 	}
 	// If brackets are unbalanced, try to repair
 	if braceCount > 0 || bracketCount > 0 {
 		if lastValidIndex > 0 && lastValidIndex < len(str)-1 {
 			truncated := str[:lastValidIndex+1]
 			// Recount brackets after truncation
 			braceCount = 0
 			bracketCount = 0
 			inString = false
 			escape = false
 			for i := 0; i < len(truncated); i++ {
 				char := truncated[i]
 				if escape {
 					escape = false
 					continue
 				}
 				if char == '\\' {
 					escape = true
 					continue
 				}
 				if char == '"' {
 					inString = !inString
 					continue
 				}
 				if inString {
 					continue
 				}
 				switch char {
 				case '{':
 					braceCount++
 				case '}':
 					braceCount--
 				case '[':
 					bracketCount++
 				case ']':
 					bracketCount--
 				}
 			}
 			str = truncated
 		}
 		// Add missing closing brackets
 		for braceCount > 0 {
 			str += "}"
 			braceCount--
 		}
 		for bracketCount > 0 {
 			str += "]"
 			bracketCount--
 		}
 	}
 	// Validate repaired JSON
 	if err := json.Unmarshal([]byte(str), &testParse); err != nil {
 		log.Warnf("kiro: repairJSON - repair failed to produce valid JSON, returning original")
 		return originalStr
 	}
 	log.Debugf("kiro: repairJSON - successfully repaired JSON")
 	return str
 }
 // escapeNewlinesInStrings escapes literal newlines, tabs, and other control characters
 // that appear inside JSON string values.
 func escapeNewlinesInStrings(raw string) string {
 	var result strings.Builder
 	result.Grow(len(raw) + 100)
 	inString := false
 	escaped := false
 	for i := 0; i < len(raw); i++ {
 		c := raw[i]
 		if escaped {
 			result.WriteByte(c)
 			escaped = false
 			continue
 		}
 		if c == '\\' && inString {
 			result.WriteByte(c)
 			escaped = true
 			continue
 		}
 		if c == '"' {
 			inString = !inString
 			result.WriteByte(c)
 			continue
 		}
 		if inString {
 			switch c {
 			case '\n':
 				result.WriteString("\\n")
 			case '\r':
 				result.WriteString("\\r")
 			case '\t':
 				result.WriteString("\\t")
 			default:
 				result.WriteByte(c)
 			}
 		} else {
 			result.WriteByte(c)
 		}
 	}
 	return result.String()
 }
 // ProcessToolUseEvent handles a toolUseEvent from the Kiro stream.
 // It accumulates input fragments and emits tool_use blocks when complete.
 // Returns events to emit and updated state.
 func ProcessToolUseEvent(event map[string]interface{}, currentToolUse *ToolUseState, processedIDs map[string]bool) ([]KiroToolUse, *ToolUseState) {
 	var toolUses []KiroToolUse
 	// Extract from nested toolUseEvent or direct format
 	tu := event
 	if nested, ok := event["toolUseEvent"].(map[string]interface{}); ok {
 		tu = nested
 	}
 	toolUseID := kirocommon.GetString(tu, "toolUseId")
 	toolName := kirocommon.GetString(tu, "name")
 	isStop := false
 	if stop, ok := tu["stop"].(bool); ok {
 		isStop = stop
 	}
 	// Get input - can be string (fragment) or object (complete)
 	var inputFragment string
 	var inputMap map[string]interface{}
 	if inputRaw, ok := tu["input"]; ok {
 		switch v := inputRaw.(type) {
 		case string:
 			inputFragment = v
 		case map[string]interface{}:
 			inputMap = v
 		}
 	}
 	// New tool use starting
 	if toolUseID != "" && toolName != "" {
 		if currentToolUse != nil && currentToolUse.ToolUseID != toolUseID {
 			log.Warnf("kiro: interleaved tool use detected - new ID %s arrived while %s in progress, completing previous",
 				toolUseID, currentToolUse.ToolUseID)
 			if !processedIDs[currentToolUse.ToolUseID] {
 				incomplete := KiroToolUse{
 					ToolUseID: currentToolUse.ToolUseID,
 					Name:      currentToolUse.Name,
 				}
 				if currentToolUse.InputBuffer.Len() > 0 {
 					raw := currentToolUse.InputBuffer.String()
 					repaired := RepairJSON(raw)
 					var input map[string]interface{}
 					if err := json.Unmarshal([]byte(repaired), &input); err != nil {
 						log.Warnf("kiro: failed to parse interleaved tool input: %v, raw: %s", err, raw)
 						input = make(map[string]interface{})
 					}
 					incomplete.Input = input
 				}
 				toolUses = append(toolUses, incomplete)
 				processedIDs[currentToolUse.ToolUseID] = true
 			}
 			currentToolUse = nil
 		}
 		if currentToolUse == nil {
 			if processedIDs != nil && processedIDs[toolUseID] {
 				log.Debugf("kiro: skipping duplicate toolUseEvent: %s", toolUseID)
 				return nil, nil
 			}
 			currentToolUse = &ToolUseState{
 				ToolUseID: toolUseID,
 				Name:      toolName,
 			}
 			log.Infof("kiro: starting new tool use: %s (ID: %s)", toolName, toolUseID)
 		}
 	}
 	// Accumulate input fragments
 	if currentToolUse != nil && inputFragment != "" {
 		currentToolUse.InputBuffer.WriteString(inputFragment)
 		log.Debugf("kiro: accumulated input fragment, total length: %d", currentToolUse.InputBuffer.Len())
 	}
 	// If complete input object provided directly
 	if currentToolUse != nil && inputMap != nil {
 		inputBytes, _ := json.Marshal(inputMap)
 		currentToolUse.InputBuffer.Reset()
 		currentToolUse.InputBuffer.Write(inputBytes)
 	}
 	// Tool use complete
 	if isStop && currentToolUse != nil {
 		fullInput := currentToolUse.InputBuffer.String()
 		// Repair and parse the accumulated JSON
 		repairedJSON := RepairJSON(fullInput)
 		var finalInput map[string]interface{}
 		if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil {
 			log.Warnf("kiro: failed to parse accumulated tool input: %v, raw: %s", err, fullInput)
 			finalInput = make(map[string]interface{})
 		}
 		toolUse := KiroToolUse{
 			ToolUseID: currentToolUse.ToolUseID,
 			Name:      currentToolUse.Name,
 			Input:     finalInput,
 		}
 		toolUses = append(toolUses, toolUse)
 		if processedIDs != nil {
 			processedIDs[currentToolUse.ToolUseID] = true
 		}
 		log.Infof("kiro: completed tool use: %s (ID: %s)", currentToolUse.Name, currentToolUse.ToolUseID)
 		return toolUses, nil
 	}
 	return toolUses, currentToolUse
 }
 // DeduplicateToolUses removes duplicate tool uses based on toolUseId and content.
 func DeduplicateToolUses(toolUses []KiroToolUse) []KiroToolUse {
 	seenIDs := make(map[string]bool)
 	seenContent := make(map[string]bool)
 	var unique []KiroToolUse
 	for _, tu := range toolUses {
 		if seenIDs[tu.ToolUseID] {
 			log.Debugf("kiro: removing ID-duplicate tool use: %s (name: %s)", tu.ToolUseID, tu.Name)
 			continue
 		}
 		inputJSON, _ := json.Marshal(tu.Input)
 		contentKey := tu.Name + ":" + string(inputJSON)
 		if seenContent[contentKey] {
 			log.Debugf("kiro: removing content-duplicate tool use: %s (id: %s)", tu.Name, tu.ToolUseID)
 			continue
 		}
 		seenIDs[tu.ToolUseID] = true
 		seenContent[contentKey] = true
 		unique = append(unique, tu)
 	}
 	return unique
 }
--- a/internal/translator/kiro/common/constants.go
+++ b/internal/translator/kiro/common/constants.go
@@ -0,0 +1,75 @@
 // Package common provides shared constants and utilities for Kiro translator.
 package common
 const (
 	// KiroMaxToolDescLen is the maximum description length for Kiro API tools.
 	// Kiro API limit is 10240 bytes, leave room for "..."
 	KiroMaxToolDescLen = 10237
 	// ThinkingStartTag is the start tag for thinking blocks in responses.
 	ThinkingStartTag = "<thinking>"
 	// ThinkingEndTag is the end tag for thinking blocks in responses.
 	ThinkingEndTag = "</thinking>"
 	// CodeFenceMarker is the markdown code fence marker.
 	CodeFenceMarker = "```"
 	// AltCodeFenceMarker is the alternative markdown code fence marker.
 	AltCodeFenceMarker = "~~~"
 	// InlineCodeMarker is the markdown inline code marker (backtick).
 	InlineCodeMarker = "`"
 	// KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes.
 	// AWS Kiro API has a 2-3 minute timeout for large file write operations.
 	KiroAgenticSystemPrompt = `
 # CRITICAL: CHUNKED WRITE PROTOCOL (MANDATORY)
 You MUST follow these rules for ALL file operations. Violation causes server timeouts and task failure.
 ## ABSOLUTE LIMITS
 - **MAXIMUM 350 LINES** per single write/edit operation - NO EXCEPTIONS
 - **RECOMMENDED 300 LINES** or less for optimal performance
 - **NEVER** write entire files in one operation if >300 lines
 ## MANDATORY CHUNKED WRITE STRATEGY
 ### For NEW FILES (>300 lines total):
 1. FIRST: Write initial chunk (first 250-300 lines) using write_to_file/fsWrite
 2. THEN: Append remaining content in 250-300 line chunks using file append operations
 3. REPEAT: Continue appending until complete
 ### For EDITING EXISTING FILES:
 1. Use surgical edits (apply_diff/targeted edits) - change ONLY what's needed
 2. NEVER rewrite entire files - use incremental modifications
 3. Split large refactors into multiple small, focused edits
 ### For LARGE CODE GENERATION:
 1. Generate in logical sections (imports, types, functions separately)
 2. Write each section as a separate operation
 3. Use append operations for subsequent sections
 ## EXAMPLES OF CORRECT BEHAVIOR
 ✅ CORRECT: Writing a 600-line file
 - Operation 1: Write lines 1-300 (initial file creation)
 - Operation 2: Append lines 301-600
 ✅ CORRECT: Editing multiple functions
 - Operation 1: Edit function A
 - Operation 2: Edit function B
 - Operation 3: Edit function C
 ❌ WRONG: Writing 500 lines in single operation → TIMEOUT
 ❌ WRONG: Rewriting entire file to change 5 lines → TIMEOUT
 ❌ WRONG: Generating massive code blocks without chunking → TIMEOUT
 ## WHY THIS MATTERS
 - Server has 2-3 minute timeout for operations
 - Large writes exceed timeout and FAIL completely
 - Chunked writes are FASTER and more RELIABLE
 - Failed writes waste time and require retry
 REMEMBER: When in doubt, write LESS per operation. Multiple small operations > one large operation.`
 )
--- a/internal/translator/kiro/common/message_merge.go
+++ b/internal/translator/kiro/common/message_merge.go
@@ -0,0 +1,125 @@
 // Package common provides shared utilities for Kiro translators.
 package common
 import (
 	"encoding/json"
 	"github.com/tidwall/gjson"
 )
 // MergeAdjacentMessages merges adjacent messages with the same role.
 // This reduces API call complexity and improves compatibility.
 // Based on AIClient-2-API implementation.
 func MergeAdjacentMessages(messages []gjson.Result) []gjson.Result {
 	if len(messages) <= 1 {
 		return messages
 	}
 	var merged []gjson.Result
 	for _, msg := range messages {
 		if len(merged) == 0 {
 			merged = append(merged, msg)
 			continue
 		}
 		lastMsg := merged[len(merged)-1]
 		currentRole := msg.Get("role").String()
 		lastRole := lastMsg.Get("role").String()
 		if currentRole == lastRole {
 			// Merge content from current message into last message
 			mergedContent := mergeMessageContent(lastMsg, msg)
 			// Create a new merged message JSON
 			mergedMsg := createMergedMessage(lastRole, mergedContent)
 			merged[len(merged)-1] = gjson.Parse(mergedMsg)
 		} else {
 			merged = append(merged, msg)
 		}
 	}
 	return merged
 }
 // mergeMessageContent merges the content of two messages with the same role.
 // Handles both string content and array content (with text, tool_use, tool_result blocks).
 func mergeMessageContent(msg1, msg2 gjson.Result) string {
 	content1 := msg1.Get("content")
 	content2 := msg2.Get("content")
 	// Extract content blocks from both messages
 	var blocks1, blocks2 []map[string]interface{}
 	if content1.IsArray() {
 		for _, block := range content1.Array() {
 			blocks1 = append(blocks1, blockToMap(block))
 		}
 	} else if content1.Type == gjson.String {
 		blocks1 = append(blocks1, map[string]interface{}{
 			"type": "text",
 			"text": content1.String(),
 		})
 	}
 	if content2.IsArray() {
 		for _, block := range content2.Array() {
 			blocks2 = append(blocks2, blockToMap(block))
 		}
 	} else if content2.Type == gjson.String {
 		blocks2 = append(blocks2, map[string]interface{}{
 			"type": "text",
 			"text": content2.String(),
 		})
 	}
 	// Merge text blocks if both end/start with text
 	if len(blocks1) > 0 && len(blocks2) > 0 {
 		if blocks1[len(blocks1)-1]["type"] == "text" && blocks2[0]["type"] == "text" {
 			// Merge the last text block of msg1 with the first text block of msg2
 			text1 := blocks1[len(blocks1)-1]["text"].(string)
 			text2 := blocks2[0]["text"].(string)
 			blocks1[len(blocks1)-1]["text"] = text1 + "\n" + text2
 			blocks2 = blocks2[1:] // Remove the merged block from blocks2
 		}
 	}
 	// Combine all blocks
 	allBlocks := append(blocks1, blocks2...)
 	// Convert to JSON
 	result, _ := json.Marshal(allBlocks)
 	return string(result)
 }
 // blockToMap converts a gjson.Result block to a map[string]interface{}
 func blockToMap(block gjson.Result) map[string]interface{} {
 	result := make(map[string]interface{})
 	block.ForEach(func(key, value gjson.Result) bool {
 		if value.IsObject() {
 			result[key.String()] = blockToMap(value)
 		} else if value.IsArray() {
 			var arr []interface{}
 			for _, item := range value.Array() {
 				if item.IsObject() {
 					arr = append(arr, blockToMap(item))
 				} else {
 					arr = append(arr, item.Value())
 				}
 			}
 			result[key.String()] = arr
 		} else {
 			result[key.String()] = value.Value()
 		}
 		return true
 	})
 	return result
 }
 // createMergedMessage creates a JSON string for a merged message
 func createMergedMessage(role string, content string) string {
 	msg := map[string]interface{}{
 		"role":    role,
 		"content": json.RawMessage(content),
 	}
 	result, _ := json.Marshal(msg)
 	return string(result)
 }
--- a/internal/translator/kiro/common/utils.go
+++ b/internal/translator/kiro/common/utils.go
@@ -0,0 +1,16 @@
 // Package common provides shared constants and utilities for Kiro translator.
 package common
 // GetString safely extracts a string from a map.
 // Returns empty string if the key doesn't exist or the value is not a string.
 func GetString(m map[string]interface{}, key string) string {
 	if v, ok := m[key].(string); ok {
 		return v
 	}
 	return ""
 }
 // GetStringValue is an alias for GetString for backward compatibility.
 func GetStringValue(m map[string]interface{}, key string) string {
 	return GetString(m, key)
 }
--- a/internal/translator/kiro/openai/chat-completions/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/chat-completions/kiro_openai_request.go
@@ -1,348 +0,0 @@
 // Package chat_completions provides request translation from OpenAI to Kiro format.
 package chat_completions
 import (
 	"bytes"
 	"encoding/json"
 	"strings"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 // reasoningEffortToBudget maps OpenAI reasoning_effort values to Claude thinking budget_tokens.
 // OpenAI uses "low", "medium", "high" while Claude uses numeric budget_tokens.
 var reasoningEffortToBudget = map[string]int{
 	"low":    4000,
 	"medium": 16000,
 	"high":   32000,
 }
 // ConvertOpenAIRequestToKiro transforms an OpenAI Chat Completions API request into Kiro (Claude) format.
 // Kiro uses Claude-compatible format internally, so we primarily pass through to Claude format.
 // Supports tool calling: OpenAI tools -> Claude tools, tool_calls -> tool_use, tool messages -> tool_result.
 // Supports reasoning/thinking: OpenAI reasoning_effort -> Claude thinking parameter.
 func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bool) []byte {
 	rawJSON := bytes.Clone(inputRawJSON)
 	root := gjson.ParseBytes(rawJSON)
 	// Build Claude-compatible request
 	out := `{"model":"","max_tokens":32000,"messages":[]}`
 	// Set model
 	out, _ = sjson.Set(out, "model", modelName)
 	// Copy max_tokens if present
 	if v := root.Get("max_tokens"); v.Exists() {
 		out, _ = sjson.Set(out, "max_tokens", v.Int())
 	}
 	// Copy temperature if present
 	if v := root.Get("temperature"); v.Exists() {
 		out, _ = sjson.Set(out, "temperature", v.Float())
 	}
 	// Copy top_p if present
 	if v := root.Get("top_p"); v.Exists() {
 		out, _ = sjson.Set(out, "top_p", v.Float())
 	}
 	// Handle OpenAI reasoning_effort parameter -> Claude thinking parameter
 	// OpenAI format: {"reasoning_effort": "low"|"medium"|"high"}
 	// Claude format: {"thinking": {"type": "enabled", "budget_tokens": N}}
 	if v := root.Get("reasoning_effort"); v.Exists() {
 		effort := v.String()
 		if budget, ok := reasoningEffortToBudget[effort]; ok {
 			thinking := map[string]interface{}{
 				"type":          "enabled",
 				"budget_tokens": budget,
 			}
 			out, _ = sjson.Set(out, "thinking", thinking)
 		}
 	}
 	// Also support direct thinking parameter passthrough (for Claude API compatibility)
 	// Claude format: {"thinking": {"type": "enabled", "budget_tokens": N}}
 	if v := root.Get("thinking"); v.Exists() && v.IsObject() {
 		out, _ = sjson.Set(out, "thinking", v.Value())
 	}
 	// Convert OpenAI tools to Claude tools format
 	if tools := root.Get("tools"); tools.Exists() && tools.IsArray() {
 		claudeTools := make([]interface{}, 0)
 		for _, tool := range tools.Array() {
 			if tool.Get("type").String() == "function" {
 				fn := tool.Get("function")
 				claudeTool := map[string]interface{}{
 					"name":        fn.Get("name").String(),
 					"description": fn.Get("description").String(),
 				}
 				// Convert parameters to input_schema
 				if params := fn.Get("parameters"); params.Exists() {
 					claudeTool["input_schema"] = params.Value()
 				} else {
 					claudeTool["input_schema"] = map[string]interface{}{
 						"type":       "object",
 						"properties": map[string]interface{}{},
 					}
 				}
 				claudeTools = append(claudeTools, claudeTool)
 			}
 		}
 		if len(claudeTools) > 0 {
 			out, _ = sjson.Set(out, "tools", claudeTools)
 		}
 	}
 	// Process messages
 	messages := root.Get("messages")
 	if messages.Exists() && messages.IsArray() {
 		claudeMessages := make([]interface{}, 0)
 		var systemPrompt string
 		// Track pending tool results to merge with next user message
 		var pendingToolResults []map[string]interface{}
 		for _, msg := range messages.Array() {
 			role := msg.Get("role").String()
 			content := msg.Get("content")
 			if role == "system" {
 				// Extract system message
 				if content.IsArray() {
 					for _, part := range content.Array() {
 						if part.Get("type").String() == "text" {
 							systemPrompt += part.Get("text").String() + "\n"
 						}
 					}
 				} else {
 					systemPrompt = content.String()
 				}
 				continue
 			}
 			if role == "tool" {
 				// OpenAI tool message -> Claude tool_result content block
 				toolCallID := msg.Get("tool_call_id").String()
 				toolContent := content.String()
 				toolResult := map[string]interface{}{
 					"type":        "tool_result",
 					"tool_use_id": toolCallID,
 				}
 				// Handle content - can be string or structured
 				if content.IsArray() {
 					contentParts := make([]interface{}, 0)
 					for _, part := range content.Array() {
 						if part.Get("type").String() == "text" {
 							contentParts = append(contentParts, map[string]interface{}{
 								"type": "text",
 								"text": part.Get("text").String(),
 							})
 						}
 					}
 					toolResult["content"] = contentParts
 				} else {
 					toolResult["content"] = toolContent
 				}
 				pendingToolResults = append(pendingToolResults, toolResult)
 				continue
 			}
 			claudeMsg := map[string]interface{}{
 				"role": role,
 			}
 			// Handle assistant messages with tool_calls
 			if role == "assistant" && msg.Get("tool_calls").Exists() {
 				contentParts := make([]interface{}, 0)
 				// Add text content if present
 				if content.Exists() && content.String() != "" {
 					contentParts = append(contentParts, map[string]interface{}{
 						"type": "text",
 						"text": content.String(),
 					})
 				}
 				// Convert tool_calls to tool_use blocks
 				for _, toolCall := range msg.Get("tool_calls").Array() {
 					toolUseID := toolCall.Get("id").String()
 					fnName := toolCall.Get("function.name").String()
 					fnArgs := toolCall.Get("function.arguments").String()
 					// Parse arguments JSON
 					var argsMap map[string]interface{}
 					if err := json.Unmarshal([]byte(fnArgs), &argsMap); err != nil {
 						argsMap = map[string]interface{}{"raw": fnArgs}
 					}
 					contentParts = append(contentParts, map[string]interface{}{
 						"type":  "tool_use",
 						"id":    toolUseID,
 						"name":  fnName,
 						"input": argsMap,
 					})
 				}
 				claudeMsg["content"] = contentParts
 				claudeMessages = append(claudeMessages, claudeMsg)
 				continue
 			}
 			// Handle user messages - may need to include pending tool results
 			if role == "user" && len(pendingToolResults) > 0 {
 				contentParts := make([]interface{}, 0)
 				// Add pending tool results first
 				for _, tr := range pendingToolResults {
 					contentParts = append(contentParts, tr)
 				}
 				pendingToolResults = nil
 				// Add user content
 				if content.IsArray() {
 					for _, part := range content.Array() {
 						partType := part.Get("type").String()
 						if partType == "text" {
 							contentParts = append(contentParts, map[string]interface{}{
 								"type": "text",
 								"text": part.Get("text").String(),
 							})
 						} else if partType == "image_url" {
 							imageURL := part.Get("image_url.url").String()
 							// Check if it's base64 format (data:image/png;base64,xxxxx)
 							if strings.HasPrefix(imageURL, "data:") {
 								// Parse data URL format
 								// Format: data:image/png;base64,xxxxx
 								commaIdx := strings.Index(imageURL, ",")
 								if commaIdx != -1 {
 									// Extract media_type (e.g., "image/png")
 									header := imageURL[5:commaIdx] // Remove "data:" prefix
 									mediaType := header
 									if semiIdx := strings.Index(header, ";"); semiIdx != -1 {
 										mediaType = header[:semiIdx]
 									}
 									// Extract base64 data
 									base64Data := imageURL[commaIdx+1:]
 									contentParts = append(contentParts, map[string]interface{}{
 										"type": "image",
 										"source": map[string]interface{}{
 											"type":       "base64",
 											"media_type": mediaType,
 											"data":       base64Data,
 										},
 									})
 								}
 							} else {
 								// Regular URL format - keep original logic
 								contentParts = append(contentParts, map[string]interface{}{
 									"type": "image",
 									"source": map[string]interface{}{
 										"type": "url",
 										"url":  imageURL,
 									},
 								})
 							}
 						}
 					}
 				} else if content.String() != "" {
 					contentParts = append(contentParts, map[string]interface{}{
 						"type": "text",
 						"text": content.String(),
 					})
 				}
 				claudeMsg["content"] = contentParts
 				claudeMessages = append(claudeMessages, claudeMsg)
 				continue
 			}
 			// Handle regular content
 			if content.IsArray() {
 				contentParts := make([]interface{}, 0)
 				for _, part := range content.Array() {
 					partType := part.Get("type").String()
 					if partType == "text" {
 						contentParts = append(contentParts, map[string]interface{}{
 							"type": "text",
 							"text": part.Get("text").String(),
 						})
 					} else if partType == "image_url" {
 						imageURL := part.Get("image_url.url").String()
 						// Check if it's base64 format (data:image/png;base64,xxxxx)
 						if strings.HasPrefix(imageURL, "data:") {
 							// Parse data URL format
 							// Format: data:image/png;base64,xxxxx
 							commaIdx := strings.Index(imageURL, ",")
 							if commaIdx != -1 {
 								// Extract media_type (e.g., "image/png")
 								header := imageURL[5:commaIdx] // Remove "data:" prefix
 								mediaType := header
 								if semiIdx := strings.Index(header, ";"); semiIdx != -1 {
 									mediaType = header[:semiIdx]
 								}
 								// Extract base64 data
 								base64Data := imageURL[commaIdx+1:]
 								contentParts = append(contentParts, map[string]interface{}{
 									"type": "image",
 									"source": map[string]interface{}{
 										"type":       "base64",
 										"media_type": mediaType,
 										"data":       base64Data,
 									},
 								})
 							}
 						} else {
 							// Regular URL format - keep original logic
 							contentParts = append(contentParts, map[string]interface{}{
 								"type": "image",
 								"source": map[string]interface{}{
 									"type": "url",
 									"url":  imageURL,
 								},
 							})
 						}
 					}
 				}
 				claudeMsg["content"] = contentParts
 			} else {
 				claudeMsg["content"] = content.String()
 			}
 			claudeMessages = append(claudeMessages, claudeMsg)
 		}
 		// If there are pending tool results without a following user message,
 		// create a user message with just the tool results
 		if len(pendingToolResults) > 0 {
 			contentParts := make([]interface{}, 0)
 			for _, tr := range pendingToolResults {
 				contentParts = append(contentParts, tr)
 			}
 			claudeMessages = append(claudeMessages, map[string]interface{}{
 				"role":    "user",
 				"content": contentParts,
 			})
 		}
 		out, _ = sjson.Set(out, "messages", claudeMessages)
 		if systemPrompt != "" {
 			out, _ = sjson.Set(out, "system", systemPrompt)
 		}
 	}
 	// Set stream
 	out, _ = sjson.Set(out, "stream", stream)
 	return []byte(out)
 }
--- a/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go
+++ b/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go
@@ -1,404 +0,0 @@
 // Package chat_completions provides response translation from Kiro to OpenAI format.
 package chat_completions
 import (
 	"context"
 	"encoding/json"
 	"strings"
 	"time"
 	"github.com/google/uuid"
 	"github.com/tidwall/gjson"
 )
 // ConvertKiroResponseToOpenAI converts Kiro streaming response to OpenAI SSE format.
 // Handles Claude SSE events: content_block_start, content_block_delta, input_json_delta,
 // content_block_stop, message_delta, and message_stop.
 // Input may be in SSE format: "event: xxx\ndata: {...}" or raw JSON.
 func ConvertKiroResponseToOpenAI(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
 	raw := string(rawResponse)
 	var results []string
 	// Handle SSE format: extract JSON from "data: " lines
 	// Input format: "event: message_start\ndata: {...}"
 	lines := strings.Split(raw, "\n")
 	for _, line := range lines {
 		line = strings.TrimSpace(line)
 		if strings.HasPrefix(line, "data: ") {
 			jsonPart := strings.TrimPrefix(line, "data: ")
 			chunks := convertClaudeEventToOpenAI(jsonPart, model)
 			results = append(results, chunks...)
 		} else if strings.HasPrefix(line, "{") {
 			// Raw JSON (backward compatibility)
 			chunks := convertClaudeEventToOpenAI(line, model)
 			results = append(results, chunks...)
 		}
 	}
 	return results
 }
 // convertClaudeEventToOpenAI converts a single Claude JSON event to OpenAI format
 func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
 	root := gjson.Parse(jsonStr)
 	var results []string
 	eventType := root.Get("type").String()
 	switch eventType {
 	case "message_start":
 		// Initial message event - emit initial chunk with role
 		response := map[string]interface{}{
 			"id":      "chatcmpl-" + uuid.New().String()[:24],
 			"object":  "chat.completion.chunk",
 			"created": time.Now().Unix(),
 			"model":   model,
 			"choices": []map[string]interface{}{
 				{
 					"index": 0,
 					"delta": map[string]interface{}{
 						"role":    "assistant",
 						"content": "",
 					},
 					"finish_reason": nil,
 				},
 			},
 		}
 		result, _ := json.Marshal(response)
 		results = append(results, string(result))
 		return results
 	case "content_block_start":
 		// Start of a content block (text or tool_use)
 		blockType := root.Get("content_block.type").String()
 		index := int(root.Get("index").Int())
 		if blockType == "tool_use" {
 			// Start of tool_use block
 			toolUseID := root.Get("content_block.id").String()
 			toolName := root.Get("content_block.name").String()
 			toolCall := map[string]interface{}{
 				"index": index,
 				"id":    toolUseID,
 				"type":  "function",
 				"function": map[string]interface{}{
 					"name":      toolName,
 					"arguments": "",
 				},
 			}
 			response := map[string]interface{}{
 				"id":      "chatcmpl-" + uuid.New().String()[:24],
 				"object":  "chat.completion.chunk",
 				"created": time.Now().Unix(),
 				"model":   model,
 				"choices": []map[string]interface{}{
 					{
 						"index": 0,
 						"delta": map[string]interface{}{
 							"tool_calls": []map[string]interface{}{toolCall},
 						},
 						"finish_reason": nil,
 					},
 				},
 			}
 			result, _ := json.Marshal(response)
 			results = append(results, string(result))
 		}
 		return results
 	case "content_block_delta":
 		index := int(root.Get("index").Int())
 		deltaType := root.Get("delta.type").String()
 		if deltaType == "text_delta" {
 			// Text content delta
 			contentDelta := root.Get("delta.text").String()
 			if contentDelta != "" {
 				response := map[string]interface{}{
 					"id":      "chatcmpl-" + uuid.New().String()[:24],
 					"object":  "chat.completion.chunk",
 					"created": time.Now().Unix(),
 					"model":   model,
 					"choices": []map[string]interface{}{
 						{
 							"index": 0,
 							"delta": map[string]interface{}{
 								"content": contentDelta,
 							},
 							"finish_reason": nil,
 						},
 					},
 				}
 				result, _ := json.Marshal(response)
 				results = append(results, string(result))
 			}
 		} else if deltaType == "thinking_delta" {
 			// Thinking/reasoning content delta - convert to OpenAI reasoning_content format
 			thinkingDelta := root.Get("delta.thinking").String()
 			if thinkingDelta != "" {
 				response := map[string]interface{}{
 					"id":      "chatcmpl-" + uuid.New().String()[:24],
 					"object":  "chat.completion.chunk",
 					"created": time.Now().Unix(),
 					"model":   model,
 					"choices": []map[string]interface{}{
 						{
 							"index": 0,
 							"delta": map[string]interface{}{
 								"reasoning_content": thinkingDelta,
 							},
 							"finish_reason": nil,
 						},
 					},
 				}
 				result, _ := json.Marshal(response)
 				results = append(results, string(result))
 			}
 		} else if deltaType == "input_json_delta" {
 			// Tool input delta (streaming arguments)
 			partialJSON := root.Get("delta.partial_json").String()
 			if partialJSON != "" {
 				toolCall := map[string]interface{}{
 					"index": index,
 					"function": map[string]interface{}{
 						"arguments": partialJSON,
 					},
 				}
 				response := map[string]interface{}{
 					"id":      "chatcmpl-" + uuid.New().String()[:24],
 					"object":  "chat.completion.chunk",
 					"created": time.Now().Unix(),
 					"model":   model,
 					"choices": []map[string]interface{}{
 						{
 							"index": 0,
 							"delta": map[string]interface{}{
 								"tool_calls": []map[string]interface{}{toolCall},
 							},
 							"finish_reason": nil,
 						},
 					},
 				}
 				result, _ := json.Marshal(response)
 				results = append(results, string(result))
 			}
 		}
 		return results
 	case "content_block_stop":
 		// End of content block - no output needed for OpenAI format
 		return results
 	case "message_delta":
 		// Final message delta with stop_reason and usage
 		stopReason := root.Get("delta.stop_reason").String()
 		if stopReason != "" {
 			finishReason := "stop"
 			if stopReason == "tool_use" {
 				finishReason = "tool_calls"
 			} else if stopReason == "end_turn" {
 				finishReason = "stop"
 			} else if stopReason == "max_tokens" {
 				finishReason = "length"
 			}
 			response := map[string]interface{}{
 				"id":      "chatcmpl-" + uuid.New().String()[:24],
 				"object":  "chat.completion.chunk",
 				"created": time.Now().Unix(),
 				"model":   model,
 				"choices": []map[string]interface{}{
 					{
 						"index":         0,
 						"delta":         map[string]interface{}{},
 						"finish_reason": finishReason,
 					},
 				},
 			}
 			// Extract and include usage information from message_delta event
 			usage := root.Get("usage")
 			if usage.Exists() {
 				inputTokens := usage.Get("input_tokens").Int()
 				outputTokens := usage.Get("output_tokens").Int()
 				response["usage"] = map[string]interface{}{
 					"prompt_tokens":     inputTokens,
 					"completion_tokens": outputTokens,
 					"total_tokens":      inputTokens + outputTokens,
 				}
 			}
 			result, _ := json.Marshal(response)
 			results = append(results, string(result))
 		}
 		return results
 	case "message_stop":
 		// End of message - could emit [DONE] marker
 		return results
 	}
 	// Fallback: handle raw content for backward compatibility
 	var contentDelta string
 	if delta := root.Get("delta.text"); delta.Exists() {
 		contentDelta = delta.String()
 	} else if content := root.Get("content"); content.Exists() && root.Get("type").String() == "" {
 		contentDelta = content.String()
 	}
 	if contentDelta != "" {
 		response := map[string]interface{}{
 			"id":      "chatcmpl-" + uuid.New().String()[:24],
 			"object":  "chat.completion.chunk",
 			"created": time.Now().Unix(),
 			"model":   model,
 			"choices": []map[string]interface{}{
 				{
 					"index": 0,
 					"delta": map[string]interface{}{
 						"content": contentDelta,
 					},
 					"finish_reason": nil,
 				},
 			},
 		}
 		result, _ := json.Marshal(response)
 		results = append(results, string(result))
 	}
 	// Handle tool_use content blocks (Claude format) - fallback
 	toolUses := root.Get("delta.tool_use")
 	if !toolUses.Exists() {
 		toolUses = root.Get("tool_use")
 	}
 	if toolUses.Exists() && toolUses.IsObject() {
 		inputJSON := toolUses.Get("input").String()
 		if inputJSON == "" {
 			if inputObj := toolUses.Get("input"); inputObj.Exists() {
 				inputBytes, _ := json.Marshal(inputObj.Value())
 				inputJSON = string(inputBytes)
 			}
 		}
 		toolCall := map[string]interface{}{
 			"index": 0,
 			"id":    toolUses.Get("id").String(),
 			"type":  "function",
 			"function": map[string]interface{}{
 				"name":      toolUses.Get("name").String(),
 				"arguments": inputJSON,
 			},
 		}
 		response := map[string]interface{}{
 			"id":      "chatcmpl-" + uuid.New().String()[:24],
 			"object":  "chat.completion.chunk",
 			"created": time.Now().Unix(),
 			"model":   model,
 			"choices": []map[string]interface{}{
 				{
 					"index": 0,
 					"delta": map[string]interface{}{
 						"tool_calls": []map[string]interface{}{toolCall},
 					},
 					"finish_reason": nil,
 				},
 			},
 		}
 		result, _ := json.Marshal(response)
 		results = append(results, string(result))
 	}
 	return results
 }
 // ConvertKiroResponseToOpenAINonStream converts Kiro non-streaming response to OpenAI format.
 func ConvertKiroResponseToOpenAINonStream(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) string {
 	root := gjson.ParseBytes(rawResponse)
 	var content string
 	var reasoningContent string
 	var toolCalls []map[string]interface{}
 	contentArray := root.Get("content")
 	if contentArray.IsArray() {
 		for _, item := range contentArray.Array() {
 			itemType := item.Get("type").String()
 			if itemType == "text" {
 				content += item.Get("text").String()
 			} else if itemType == "thinking" {
 				// Extract thinking/reasoning content
 				reasoningContent += item.Get("thinking").String()
 			} else if itemType == "tool_use" {
 				// Convert Claude tool_use to OpenAI tool_calls format
 				inputJSON := item.Get("input").String()
 				if inputJSON == "" {
 					// If input is an object, marshal it
 					if inputObj := item.Get("input"); inputObj.Exists() {
 						inputBytes, _ := json.Marshal(inputObj.Value())
 						inputJSON = string(inputBytes)
 					}
 				}
 				toolCall := map[string]interface{}{
 					"id":   item.Get("id").String(),
 					"type": "function",
 					"function": map[string]interface{}{
 						"name":      item.Get("name").String(),
 						"arguments": inputJSON,
 					},
 				}
 				toolCalls = append(toolCalls, toolCall)
 			}
 		}
 	} else {
 		content = root.Get("content").String()
 	}
 	inputTokens := root.Get("usage.input_tokens").Int()
 	outputTokens := root.Get("usage.output_tokens").Int()
 	message := map[string]interface{}{
 		"role":    "assistant",
 		"content": content,
 	}
 	// Add reasoning_content if present (OpenAI reasoning format)
 	if reasoningContent != "" {
 		message["reasoning_content"] = reasoningContent
 	}
 	// Add tool_calls if present
 	if len(toolCalls) > 0 {
 		message["tool_calls"] = toolCalls
 	}
 	finishReason := "stop"
 	if len(toolCalls) > 0 {
 		finishReason = "tool_calls"
 	}
 	response := map[string]interface{}{
 		"id":      "chatcmpl-" + uuid.New().String()[:24],
 		"object":  "chat.completion",
 		"created": time.Now().Unix(),
 		"model":   model,
 		"choices": []map[string]interface{}{
 			{
 				"index":         0,
 				"message":       message,
 				"finish_reason": finishReason,
 			},
 		},
 		"usage": map[string]interface{}{
 			"prompt_tokens":     inputTokens,
 			"completion_tokens": outputTokens,
 			"total_tokens":      inputTokens + outputTokens,
 		},
 	}
 	result, _ := json.Marshal(response)
 	return string(result)
 }
--- a/internal/translator/kiro/openai/chat-completions/init.go
+++ b/internal/translator/kiro/openai/chat-completions/init.go
@@ -1,4 +1,5 @@
-package chat_completions
+// Package openai provides translation between OpenAI Chat Completions and Kiro formats.
 package openai
 import (
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
@@ -8,12 +9,12 @@ import (
 func init() {
 	translator.Register(
-		OpenAI,
+		OpenAI, // source format
-		Kiro,
+		Kiro,   // target format
 		ConvertOpenAIRequestToKiro,
 		interfaces.TranslateResponse{
-			Stream:    ConvertKiroResponseToOpenAI,
+			Stream:    ConvertKiroStreamToOpenAI,
-			NonStream: ConvertKiroResponseToOpenAINonStream,
+			NonStream: ConvertKiroNonStreamToOpenAI,
 		},
 	)
-}
+}
--- a/internal/translator/kiro/openai/kiro_openai.go
+++ b/internal/translator/kiro/openai/kiro_openai.go
@@ -0,0 +1,369 @@
 // Package openai provides translation between OpenAI Chat Completions and Kiro formats.
 // This package enables direct OpenAI → Kiro translation, bypassing the Claude intermediate layer.
 //
 // The Kiro executor generates Claude-compatible SSE format internally, so the streaming response
 // translation converts from Claude SSE format to OpenAI SSE format.
 package openai
 import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"strings"
 	kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 )
 // ConvertKiroStreamToOpenAI converts Kiro streaming response to OpenAI format.
 // The Kiro executor emits Claude-compatible SSE events, so this function translates
 // from Claude SSE format to OpenAI SSE format.
 //
 // Claude SSE format:
 //   - event: message_start\ndata: {...}
 //   - event: content_block_start\ndata: {...}
 //   - event: content_block_delta\ndata: {...}
 //   - event: content_block_stop\ndata: {...}
 //   - event: message_delta\ndata: {...}
 //   - event: message_stop\ndata: {...}
 //
 // OpenAI SSE format:
 //   - data: {"id":"...","object":"chat.completion.chunk",...}
 //   - data: [DONE]
 func ConvertKiroStreamToOpenAI(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
 	// Initialize state if needed
 	if *param == nil {
 		*param = NewOpenAIStreamState(model)
 	}
 	state := (*param).(*OpenAIStreamState)
 	// Parse the Claude SSE event
 	responseStr := string(rawResponse)
 	// Handle raw event format (event: xxx\ndata: {...})
 	var eventType string
 	var eventData string
 	if strings.HasPrefix(responseStr, "event:") {
 		// Parse event type and data
 		lines := strings.SplitN(responseStr, "\n", 2)
 		if len(lines) >= 1 {
 			eventType = strings.TrimSpace(strings.TrimPrefix(lines[0], "event:"))
 		}
 		if len(lines) >= 2 && strings.HasPrefix(lines[1], "data:") {
 			eventData = strings.TrimSpace(strings.TrimPrefix(lines[1], "data:"))
 		}
 	} else if strings.HasPrefix(responseStr, "data:") {
 		// Just data line
 		eventData = strings.TrimSpace(strings.TrimPrefix(responseStr, "data:"))
 	} else {
 		// Try to parse as raw JSON
 		eventData = strings.TrimSpace(responseStr)
 	}
 	if eventData == "" {
 		return []string{}
 	}
 	// Parse the event data as JSON
 	eventJSON := gjson.Parse(eventData)
 	if !eventJSON.Exists() {
 		return []string{}
 	}
 	// Determine event type from JSON if not already set
 	if eventType == "" {
 		eventType = eventJSON.Get("type").String()
 	}
 	var results []string
 	switch eventType {
 	case "message_start":
 		// Send first chunk with role
 		firstChunk := BuildOpenAISSEFirstChunk(state)
 		results = append(results, firstChunk)
 	case "content_block_start":
 		// Check block type
 		blockType := eventJSON.Get("content_block.type").String()
 		switch blockType {
 		case "text":
 			// Text block starting - nothing to emit yet
 		case "thinking":
 			// Thinking block starting - nothing to emit yet for OpenAI
 		case "tool_use":
 			// Tool use block starting
 			toolUseID := eventJSON.Get("content_block.id").String()
 			toolName := eventJSON.Get("content_block.name").String()
 			chunk := BuildOpenAISSEToolCallStart(state, toolUseID, toolName)
 			results = append(results, chunk)
 			state.ToolCallIndex++
 		}
 	case "content_block_delta":
 		deltaType := eventJSON.Get("delta.type").String()
 		switch deltaType {
 		case "text_delta":
 			textDelta := eventJSON.Get("delta.text").String()
 			if textDelta != "" {
 				chunk := BuildOpenAISSETextDelta(state, textDelta)
 				results = append(results, chunk)
 			}
 		case "thinking_delta":
 			// Convert thinking to reasoning_content for o1-style compatibility
 			thinkingDelta := eventJSON.Get("delta.thinking").String()
 			if thinkingDelta != "" {
 				chunk := BuildOpenAISSEReasoningDelta(state, thinkingDelta)
 				results = append(results, chunk)
 			}
 		case "input_json_delta":
 			// Tool call arguments delta
 			partialJSON := eventJSON.Get("delta.partial_json").String()
 			if partialJSON != "" {
 				// Get the tool index from content block index
 				blockIndex := int(eventJSON.Get("index").Int())
 				chunk := BuildOpenAISSEToolCallArgumentsDelta(state, partialJSON, blockIndex-1) // Adjust for 0-based tool index
 				results = append(results, chunk)
 			}
 		}
 	case "content_block_stop":
 		// Content block ended - nothing to emit for OpenAI
 	case "message_delta":
 		// Message delta with stop_reason
 		stopReason := eventJSON.Get("delta.stop_reason").String()
 		finishReason := mapKiroStopReasonToOpenAI(stopReason)
 		if finishReason != "" {
 			chunk := BuildOpenAISSEFinish(state, finishReason)
 			results = append(results, chunk)
 		}
 		// Extract usage if present
 		if eventJSON.Get("usage").Exists() {
 			inputTokens := eventJSON.Get("usage.input_tokens").Int()
 			outputTokens := eventJSON.Get("usage.output_tokens").Int()
 			usageInfo := usage.Detail{
 				InputTokens:  inputTokens,
 				OutputTokens: outputTokens,
 				TotalTokens:  inputTokens + outputTokens,
 			}
 			chunk := BuildOpenAISSEUsage(state, usageInfo)
 			results = append(results, chunk)
 		}
 	case "message_stop":
 		// Final event - do NOT emit [DONE] here
 		// The handler layer (openai_handlers.go) will send [DONE] when the stream closes
 		// Emitting [DONE] here would cause duplicate [DONE] markers
 	case "ping":
 		// Ping event with usage - optionally emit usage chunk
 		if eventJSON.Get("usage").Exists() {
 			inputTokens := eventJSON.Get("usage.input_tokens").Int()
 			outputTokens := eventJSON.Get("usage.output_tokens").Int()
 			usageInfo := usage.Detail{
 				InputTokens:  inputTokens,
 				OutputTokens: outputTokens,
 				TotalTokens:  inputTokens + outputTokens,
 			}
 			chunk := BuildOpenAISSEUsage(state, usageInfo)
 			results = append(results, chunk)
 		}
 	}
 	return results
 }
 // ConvertKiroNonStreamToOpenAI converts Kiro non-streaming response to OpenAI format.
 // The Kiro executor returns Claude-compatible JSON responses, so this function translates
 // from Claude format to OpenAI format.
 func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) string {
 	// Parse the Claude-format response
 	response := gjson.ParseBytes(rawResponse)
 	// Extract content
 	var content string
 	var toolUses []KiroToolUse
 	var stopReason string
 	// Get stop_reason
 	stopReason = response.Get("stop_reason").String()
 	// Process content blocks
 	contentBlocks := response.Get("content")
 	if contentBlocks.IsArray() {
 		for _, block := range contentBlocks.Array() {
 			blockType := block.Get("type").String()
 			switch blockType {
 			case "text":
 				content += block.Get("text").String()
 			case "thinking":
 				// Skip thinking blocks for OpenAI format (or convert to reasoning_content if needed)
 			case "tool_use":
 				toolUseID := block.Get("id").String()
 				toolName := block.Get("name").String()
 				toolInput := block.Get("input")
 				var inputMap map[string]interface{}
 				if toolInput.IsObject() {
 					inputMap = make(map[string]interface{})
 					toolInput.ForEach(func(key, value gjson.Result) bool {
 						inputMap[key.String()] = value.Value()
 						return true
 					})
 				}
 				toolUses = append(toolUses, KiroToolUse{
 					ToolUseID: toolUseID,
 					Name:      toolName,
 					Input:     inputMap,
 				})
 			}
 		}
 	}
 	// Extract usage
 	usageInfo := usage.Detail{
 		InputTokens:  response.Get("usage.input_tokens").Int(),
 		OutputTokens: response.Get("usage.output_tokens").Int(),
 	}
 	usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
 	// Build OpenAI response
 	openaiResponse := BuildOpenAIResponse(content, toolUses, model, usageInfo, stopReason)
 	return string(openaiResponse)
 }
 // ParseClaudeEvent parses a Claude SSE event and returns the event type and data
 func ParseClaudeEvent(rawEvent []byte) (eventType string, eventData []byte) {
 	lines := bytes.Split(rawEvent, []byte("\n"))
 	for _, line := range lines {
 		line = bytes.TrimSpace(line)
 		if bytes.HasPrefix(line, []byte("event:")) {
 			eventType = string(bytes.TrimSpace(bytes.TrimPrefix(line, []byte("event:"))))
 		} else if bytes.HasPrefix(line, []byte("data:")) {
 			eventData = bytes.TrimSpace(bytes.TrimPrefix(line, []byte("data:")))
 		}
 	}
 	return eventType, eventData
 }
 // ExtractThinkingFromContent parses content to extract thinking blocks.
 // Returns cleaned content (without thinking tags) and whether thinking was found.
 func ExtractThinkingFromContent(content string) (string, string, bool) {
 	if !strings.Contains(content, kirocommon.ThinkingStartTag) {
 		return content, "", false
 	}
 	var cleanedContent strings.Builder
 	var thinkingContent strings.Builder
 	hasThinking := false
 	remaining := content
 	for len(remaining) > 0 {
 		startIdx := strings.Index(remaining, kirocommon.ThinkingStartTag)
 		if startIdx == -1 {
 			cleanedContent.WriteString(remaining)
 			break
 		}
 		// Add content before thinking tag
 		cleanedContent.WriteString(remaining[:startIdx])
 		// Move past opening tag
 		remaining = remaining[startIdx+len(kirocommon.ThinkingStartTag):]
 		// Find closing tag
 		endIdx := strings.Index(remaining, kirocommon.ThinkingEndTag)
 		if endIdx == -1 {
 			// No closing tag - treat rest as thinking
 			thinkingContent.WriteString(remaining)
 			hasThinking = true
 			break
 		}
 		// Extract thinking content
 		thinkingContent.WriteString(remaining[:endIdx])
 		hasThinking = true
 		remaining = remaining[endIdx+len(kirocommon.ThinkingEndTag):]
 	}
 	return strings.TrimSpace(cleanedContent.String()), strings.TrimSpace(thinkingContent.String()), hasThinking
 }
 // ConvertOpenAIToolsToKiroFormat is a helper that converts OpenAI tools format to Kiro format
 func ConvertOpenAIToolsToKiroFormat(tools []map[string]interface{}) []KiroToolWrapper {
 	var kiroTools []KiroToolWrapper
 	for _, tool := range tools {
 		toolType, _ := tool["type"].(string)
 		if toolType != "function" {
 			continue
 		}
 		fn, ok := tool["function"].(map[string]interface{})
 		if !ok {
 			continue
 		}
 		name := kirocommon.GetString(fn, "name")
 		description := kirocommon.GetString(fn, "description")
 		parameters := fn["parameters"]
 		if name == "" {
 			continue
 		}
 		if description == "" {
 			description = "Tool: " + name
 		}
 		kiroTools = append(kiroTools, KiroToolWrapper{
 			ToolSpecification: KiroToolSpecification{
 				Name:        name,
 				Description: description,
 				InputSchema: KiroInputSchema{JSON: parameters},
 			},
 		})
 	}
 	return kiroTools
 }
 // OpenAIStreamParams holds parameters for OpenAI streaming conversion
 type OpenAIStreamParams struct {
 	State            *OpenAIStreamState
 	ThinkingState    *ThinkingTagState
 	ToolCallsEmitted map[string]bool
 }
 // NewOpenAIStreamParams creates new streaming parameters
 func NewOpenAIStreamParams(model string) *OpenAIStreamParams {
 	return &OpenAIStreamParams{
 		State:            NewOpenAIStreamState(model),
 		ThinkingState:    NewThinkingTagState(),
 		ToolCallsEmitted: make(map[string]bool),
 	}
 }
 // ConvertClaudeToolUseToOpenAI converts a Claude tool_use block to OpenAI tool_calls format
 func ConvertClaudeToolUseToOpenAI(toolUseID, toolName string, input map[string]interface{}) map[string]interface{} {
 	inputJSON, _ := json.Marshal(input)
 	return map[string]interface{}{
 		"id":   toolUseID,
 		"type": "function",
 		"function": map[string]interface{}{
 			"name":      toolName,
 			"arguments": string(inputJSON),
 		},
 	}
 }
 // LogStreamEvent logs a streaming event for debugging
 func LogStreamEvent(eventType, data string) {
 	log.Debugf("kiro-openai: stream event type=%s, data_len=%d", eventType, len(data))
 }
--- a/internal/translator/kiro/openai/kiro_openai_request.go
+++ b/internal/translator/kiro/openai/kiro_openai_request.go
@@ -0,0 +1,847 @@
 // Package openai provides request translation from OpenAI Chat Completions to Kiro format.
 // It handles parsing and transforming OpenAI API requests into the Kiro/Amazon Q API format,
 // extracting model information, system instructions, message contents, and tool declarations.
 package openai
 import (
 	"encoding/json"
 	"fmt"
 	"strings"
 	"time"
 	"unicode/utf8"
 	"github.com/google/uuid"
 	kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
 	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 )
 // Kiro API request structs - reuse from kiroclaude package structure
 // KiroPayload is the top-level request structure for Kiro API
 type KiroPayload struct {
 	ConversationState KiroConversationState `json:"conversationState"`
 	ProfileArn        string                `json:"profileArn,omitempty"`
 	InferenceConfig   *KiroInferenceConfig  `json:"inferenceConfig,omitempty"`
 }
 // KiroInferenceConfig contains inference parameters for the Kiro API.
 type KiroInferenceConfig struct {
 	MaxTokens   int     `json:"maxTokens,omitempty"`
 	Temperature float64 `json:"temperature,omitempty"`
 	TopP        float64 `json:"topP,omitempty"`
 }
 // KiroConversationState holds the conversation context
 type KiroConversationState struct {
 	ChatTriggerType string               `json:"chatTriggerType"` // Required: "MANUAL"
 	ConversationID  string               `json:"conversationId"`
 	CurrentMessage  KiroCurrentMessage   `json:"currentMessage"`
 	History         []KiroHistoryMessage `json:"history,omitempty"`
 }
 // KiroCurrentMessage wraps the current user message
 type KiroCurrentMessage struct {
 	UserInputMessage KiroUserInputMessage `json:"userInputMessage"`
 }
 // KiroHistoryMessage represents a message in the conversation history
 type KiroHistoryMessage struct {
 	UserInputMessage         *KiroUserInputMessage         `json:"userInputMessage,omitempty"`
 	AssistantResponseMessage *KiroAssistantResponseMessage `json:"assistantResponseMessage,omitempty"`
 }
 // KiroImage represents an image in Kiro API format
 type KiroImage struct {
 	Format string          `json:"format"`
 	Source KiroImageSource `json:"source"`
 }
 // KiroImageSource contains the image data
 type KiroImageSource struct {
 	Bytes string `json:"bytes"` // base64 encoded image data
 }
 // KiroUserInputMessage represents a user message
 type KiroUserInputMessage struct {
 	Content                 string                       `json:"content"`
 	ModelID                 string                       `json:"modelId"`
 	Origin                  string                       `json:"origin"`
 	Images                  []KiroImage                  `json:"images,omitempty"`
 	UserInputMessageContext *KiroUserInputMessageContext `json:"userInputMessageContext,omitempty"`
 }
 // KiroUserInputMessageContext contains tool-related context
 type KiroUserInputMessageContext struct {
 	ToolResults []KiroToolResult  `json:"toolResults,omitempty"`
 	Tools       []KiroToolWrapper `json:"tools,omitempty"`
 }
 // KiroToolResult represents a tool execution result
 type KiroToolResult struct {
 	Content   []KiroTextContent `json:"content"`
 	Status    string            `json:"status"`
 	ToolUseID string            `json:"toolUseId"`
 }
 // KiroTextContent represents text content
 type KiroTextContent struct {
 	Text string `json:"text"`
 }
 // KiroToolWrapper wraps a tool specification
 type KiroToolWrapper struct {
 	ToolSpecification KiroToolSpecification `json:"toolSpecification"`
 }
 // KiroToolSpecification defines a tool's schema
 type KiroToolSpecification struct {
 	Name        string          `json:"name"`
 	Description string          `json:"description"`
 	InputSchema KiroInputSchema `json:"inputSchema"`
 }
 // KiroInputSchema wraps the JSON schema for tool input
 type KiroInputSchema struct {
 	JSON interface{} `json:"json"`
 }
 // KiroAssistantResponseMessage represents an assistant message
 type KiroAssistantResponseMessage struct {
 	Content  string        `json:"content"`
 	ToolUses []KiroToolUse `json:"toolUses,omitempty"`
 }
 // KiroToolUse represents a tool invocation by the assistant
 type KiroToolUse struct {
 	ToolUseID string                 `json:"toolUseId"`
 	Name      string                 `json:"name"`
 	Input     map[string]interface{} `json:"input"`
 }
 // ConvertOpenAIRequestToKiro converts an OpenAI Chat Completions request to Kiro format.
 // This is the main entry point for request translation.
 // Note: The actual payload building happens in the executor, this just passes through
 // the OpenAI format which will be converted by BuildKiroPayloadFromOpenAI.
 func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bool) []byte {
 	// Pass through the OpenAI format - actual conversion happens in BuildKiroPayloadFromOpenAI
 	return inputRawJSON
 }
 // BuildKiroPayloadFromOpenAI constructs the Kiro API request payload from OpenAI format.
 // Supports tool calling - tools are passed via userInputMessageContext.
 // origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
 // isAgentic parameter enables chunked write optimization prompt for -agentic model variants.
 // isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
 func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
 	// Extract max_tokens for potential use in inferenceConfig
 	// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
 	const kiroMaxOutputTokens = 32000
 	var maxTokens int64
 	if mt := gjson.GetBytes(openaiBody, "max_tokens"); mt.Exists() {
 		maxTokens = mt.Int()
 		if maxTokens == -1 {
 			maxTokens = kiroMaxOutputTokens
 			log.Debugf("kiro-openai: max_tokens=-1 converted to %d", kiroMaxOutputTokens)
 		}
 	}
 	// Extract temperature if specified
 	var temperature float64
 	var hasTemperature bool
 	if temp := gjson.GetBytes(openaiBody, "temperature"); temp.Exists() {
 		temperature = temp.Float()
 		hasTemperature = true
 	}
 	// Extract top_p if specified
 	var topP float64
 	var hasTopP bool
 	if tp := gjson.GetBytes(openaiBody, "top_p"); tp.Exists() {
 		topP = tp.Float()
 		hasTopP = true
 		log.Debugf("kiro-openai: extracted top_p: %.2f", topP)
 	}
 	// Normalize origin value for Kiro API compatibility
 	origin = normalizeOrigin(origin)
 	log.Debugf("kiro-openai: normalized origin value: %s", origin)
 	messages := gjson.GetBytes(openaiBody, "messages")
 	// For chat-only mode, don't include tools
 	var tools gjson.Result
 	if !isChatOnly {
 		tools = gjson.GetBytes(openaiBody, "tools")
 	}
 	// Extract system prompt from messages
 	systemPrompt := extractSystemPromptFromOpenAI(messages)
 	// Inject timestamp context
 	timestamp := time.Now().Format("2006-01-02 15:04:05 MST")
 	timestampContext := fmt.Sprintf("[Context: Current time is %s]", timestamp)
 	if systemPrompt != "" {
 		systemPrompt = timestampContext + "\n\n" + systemPrompt
 	} else {
 		systemPrompt = timestampContext
 	}
 	log.Debugf("kiro-openai: injected timestamp context: %s", timestamp)
 	// Inject agentic optimization prompt for -agentic model variants
 	if isAgentic {
 		if systemPrompt != "" {
 			systemPrompt += "\n"
 		}
 		systemPrompt += kirocommon.KiroAgenticSystemPrompt
 	}
 	// Handle tool_choice parameter - Kiro doesn't support it natively, so we inject system prompt hints
 	// OpenAI tool_choice values: "none", "auto", "required", or {"type":"function","function":{"name":"..."}}
 	toolChoiceHint := extractToolChoiceHint(openaiBody)
 	if toolChoiceHint != "" {
 		if systemPrompt != "" {
 			systemPrompt += "\n"
 		}
 		systemPrompt += toolChoiceHint
 		log.Debugf("kiro-openai: injected tool_choice hint into system prompt")
 	}
 	// Handle response_format parameter - Kiro doesn't support it natively, so we inject system prompt hints
 	// OpenAI response_format: {"type": "json_object"} or {"type": "json_schema", "json_schema": {...}}
 	responseFormatHint := extractResponseFormatHint(openaiBody)
 	if responseFormatHint != "" {
 		if systemPrompt != "" {
 			systemPrompt += "\n"
 		}
 		systemPrompt += responseFormatHint
 		log.Debugf("kiro-openai: injected response_format hint into system prompt")
 	}
 	// Check for thinking mode and inject thinking hint
 	// Supports OpenAI reasoning_effort parameter and model name hints
 	thinkingEnabled, budgetTokens := checkThinkingModeFromOpenAI(openaiBody)
 	if thinkingEnabled {
 		// Adjust budgetTokens based on max_tokens if not explicitly set by reasoning_effort
 		// Use 50% of max_tokens for thinking, with min 8000 and max 24000
 		if maxTokens > 0 && budgetTokens == 16000 { // 16000 is the default, meaning not explicitly set
 			calculatedBudget := maxTokens / 2
 			if calculatedBudget < 8000 {
 				calculatedBudget = 8000
 			}
 			if calculatedBudget > 24000 {
 				calculatedBudget = 24000
 			}
 			budgetTokens = calculatedBudget
 			log.Debugf("kiro-openai: budgetTokens calculated from max_tokens: %d (max_tokens=%d)", budgetTokens, maxTokens)
 		}
 		if systemPrompt != "" {
 			systemPrompt += "\n"
 		}
 		dynamicThinkingHint := fmt.Sprintf("<thinking_mode>interleaved</thinking_mode><max_thinking_length>%d</max_thinking_length>", budgetTokens)
 		systemPrompt += dynamicThinkingHint
 		log.Debugf("kiro-openai: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens)
 	}
 	// Convert OpenAI tools to Kiro format
 	kiroTools := convertOpenAIToolsToKiro(tools)
 	// Process messages and build history
 	history, currentUserMsg, currentToolResults := processOpenAIMessages(messages, modelID, origin)
 	// Build content with system prompt
 	if currentUserMsg != nil {
 		currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, systemPrompt, currentToolResults)
 		// Deduplicate currentToolResults
 		currentToolResults = deduplicateToolResults(currentToolResults)
 		// Build userInputMessageContext with tools and tool results
 		if len(kiroTools) > 0 || len(currentToolResults) > 0 {
 			currentUserMsg.UserInputMessageContext = &KiroUserInputMessageContext{
 				Tools:       kiroTools,
 				ToolResults: currentToolResults,
 			}
 		}
 	}
 	// Build payload
 	var currentMessage KiroCurrentMessage
 	if currentUserMsg != nil {
 		currentMessage = KiroCurrentMessage{UserInputMessage: *currentUserMsg}
 	} else {
 		fallbackContent := ""
 		if systemPrompt != "" {
 			fallbackContent = "--- SYSTEM PROMPT ---\n" + systemPrompt + "\n--- END SYSTEM PROMPT ---\n"
 		}
 		currentMessage = KiroCurrentMessage{UserInputMessage: KiroUserInputMessage{
 			Content: fallbackContent,
 			ModelID: modelID,
 			Origin:  origin,
 		}}
 	}
 	// Build inferenceConfig if we have any inference parameters
 	var inferenceConfig *KiroInferenceConfig
 	if maxTokens > 0 || hasTemperature || hasTopP {
 		inferenceConfig = &KiroInferenceConfig{}
 		if maxTokens > 0 {
 			inferenceConfig.MaxTokens = int(maxTokens)
 		}
 		if hasTemperature {
 			inferenceConfig.Temperature = temperature
 		}
 		if hasTopP {
 			inferenceConfig.TopP = topP
 		}
 	}
 	payload := KiroPayload{
 		ConversationState: KiroConversationState{
 			ChatTriggerType: "MANUAL",
 			ConversationID:  uuid.New().String(),
 			CurrentMessage:  currentMessage,
 			History:         history,
 		},
 		ProfileArn:      profileArn,
 		InferenceConfig: inferenceConfig,
 	}
 	result, err := json.Marshal(payload)
 	if err != nil {
 		log.Debugf("kiro-openai: failed to marshal payload: %v", err)
 		return nil
 	}
 	return result
 }
 // normalizeOrigin normalizes origin value for Kiro API compatibility
 func normalizeOrigin(origin string) string {
 	switch origin {
 	case "KIRO_CLI":
 		return "CLI"
 	case "KIRO_AI_EDITOR":
 		return "AI_EDITOR"
 	case "AMAZON_Q":
 		return "CLI"
 	case "KIRO_IDE":
 		return "AI_EDITOR"
 	default:
 		return origin
 	}
 }
 // extractSystemPromptFromOpenAI extracts system prompt from OpenAI messages
 func extractSystemPromptFromOpenAI(messages gjson.Result) string {
 	if !messages.IsArray() {
 		return ""
 	}
 	var systemParts []string
 	for _, msg := range messages.Array() {
 		if msg.Get("role").String() == "system" {
 			content := msg.Get("content")
 			if content.Type == gjson.String {
 				systemParts = append(systemParts, content.String())
 			} else if content.IsArray() {
 				// Handle array content format
 				for _, part := range content.Array() {
 					if part.Get("type").String() == "text" {
 						systemParts = append(systemParts, part.Get("text").String())
 					}
 				}
 			}
 		}
 	}
 	return strings.Join(systemParts, "\n")
 }
 // shortenToolNameIfNeeded shortens tool names that exceed 64 characters.
 // MCP tools often have long names like "mcp__server-name__tool-name".
 // This preserves the "mcp__" prefix and last segment when possible.
 func shortenToolNameIfNeeded(name string) string {
 	const limit = 64
 	if len(name) <= limit {
 		return name
 	}
 	// For MCP tools, try to preserve prefix and last segment
 	if strings.HasPrefix(name, "mcp__") {
 		idx := strings.LastIndex(name, "__")
 		if idx > 0 {
 			cand := "mcp__" + name[idx+2:]
 			if len(cand) > limit {
 				return cand[:limit]
 			}
 			return cand
 		}
 	}
 	return name[:limit]
 }
 // convertOpenAIToolsToKiro converts OpenAI tools to Kiro format
 func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper {
 	var kiroTools []KiroToolWrapper
 	if !tools.IsArray() {
 		return kiroTools
 	}
 	for _, tool := range tools.Array() {
 		// OpenAI tools have type "function" with function definition inside
 		if tool.Get("type").String() != "function" {
 			continue
 		}
 		fn := tool.Get("function")
 		if !fn.Exists() {
 			continue
 		}
 		name := fn.Get("name").String()
 		description := fn.Get("description").String()
 		parameters := fn.Get("parameters").Value()
 		// Shorten tool name if it exceeds 64 characters (common with MCP tools)
 		originalName := name
 		name = shortenToolNameIfNeeded(name)
 		if name != originalName {
 			log.Debugf("kiro-openai: shortened tool name from '%s' to '%s'", originalName, name)
 		}
 		// CRITICAL FIX: Kiro API requires non-empty description
 		if strings.TrimSpace(description) == "" {
 			description = fmt.Sprintf("Tool: %s", name)
 			log.Debugf("kiro-openai: tool '%s' has empty description, using default: %s", name, description)
 		}
 		// Truncate long descriptions
 		if len(description) > kirocommon.KiroMaxToolDescLen {
 			truncLen := kirocommon.KiroMaxToolDescLen - 30
 			for truncLen > 0 && !utf8.RuneStart(description[truncLen]) {
 				truncLen--
 			}
 			description = description[:truncLen] + "... (description truncated)"
 		}
 		kiroTools = append(kiroTools, KiroToolWrapper{
 			ToolSpecification: KiroToolSpecification{
 				Name:        name,
 				Description: description,
 				InputSchema: KiroInputSchema{JSON: parameters},
 			},
 		})
 	}
 	return kiroTools
 }
 // processOpenAIMessages processes OpenAI messages and builds Kiro history
 func processOpenAIMessages(messages gjson.Result, modelID, origin string) ([]KiroHistoryMessage, *KiroUserInputMessage, []KiroToolResult) {
 	var history []KiroHistoryMessage
 	var currentUserMsg *KiroUserInputMessage
 	var currentToolResults []KiroToolResult
 	if !messages.IsArray() {
 		return history, currentUserMsg, currentToolResults
 	}
 	// Merge adjacent messages with the same role
 	messagesArray := kirocommon.MergeAdjacentMessages(messages.Array())
 	// Build tool_call_id to name mapping from assistant messages
 	toolCallIDToName := make(map[string]string)
 	for _, msg := range messagesArray {
 		if msg.Get("role").String() == "assistant" {
 			toolCalls := msg.Get("tool_calls")
 			if toolCalls.IsArray() {
 				for _, tc := range toolCalls.Array() {
 					if tc.Get("type").String() == "function" {
 						id := tc.Get("id").String()
 						name := tc.Get("function.name").String()
 						if id != "" && name != "" {
 							toolCallIDToName[id] = name
 						}
 					}
 				}
 			}
 		}
 	}
 	for i, msg := range messagesArray {
 		role := msg.Get("role").String()
 		isLastMessage := i == len(messagesArray)-1
 		switch role {
 		case "system":
 			// System messages are handled separately via extractSystemPromptFromOpenAI
 			continue
 		case "user":
 			userMsg, toolResults := buildUserMessageFromOpenAI(msg, modelID, origin)
 			if isLastMessage {
 				currentUserMsg = &userMsg
 				currentToolResults = toolResults
 			} else {
 				// CRITICAL: Kiro API requires content to be non-empty for history messages
 				if strings.TrimSpace(userMsg.Content) == "" {
 					if len(toolResults) > 0 {
 						userMsg.Content = "Tool results provided."
 					} else {
 						userMsg.Content = "Continue"
 					}
 				}
 				// For history messages, embed tool results in context
 				if len(toolResults) > 0 {
 					userMsg.UserInputMessageContext = &KiroUserInputMessageContext{
 						ToolResults: toolResults,
 					}
 				}
 				history = append(history, KiroHistoryMessage{
 					UserInputMessage: &userMsg,
 				})
 			}
 		case "assistant":
 			assistantMsg := buildAssistantMessageFromOpenAI(msg)
 			if isLastMessage {
 				history = append(history, KiroHistoryMessage{
 					AssistantResponseMessage: &assistantMsg,
 				})
 				// Create a "Continue" user message as currentMessage
 				currentUserMsg = &KiroUserInputMessage{
 					Content: "Continue",
 					ModelID: modelID,
 					Origin:  origin,
 				}
 			} else {
 				history = append(history, KiroHistoryMessage{
 					AssistantResponseMessage: &assistantMsg,
 				})
 			}
 		case "tool":
 			// Tool messages in OpenAI format provide results for tool_calls
 			// These are typically followed by user or assistant messages
 			// Process them and merge into the next user message's tool results
 			toolCallID := msg.Get("tool_call_id").String()
 			content := msg.Get("content").String()
 			if toolCallID != "" {
 				toolResult := KiroToolResult{
 					ToolUseID: toolCallID,
 					Content:   []KiroTextContent{{Text: content}},
 					Status:    "success",
 				}
 				// Tool results should be included in the next user message
 				// For now, collect them and they'll be handled when we build the current message
 				currentToolResults = append(currentToolResults, toolResult)
 			}
 		}
 	}
 	return history, currentUserMsg, currentToolResults
 }
 // buildUserMessageFromOpenAI builds a user message from OpenAI format and extracts tool results
 func buildUserMessageFromOpenAI(msg gjson.Result, modelID, origin string) (KiroUserInputMessage, []KiroToolResult) {
 	content := msg.Get("content")
 	var contentBuilder strings.Builder
 	var toolResults []KiroToolResult
 	var images []KiroImage
 	// Track seen toolCallIds to deduplicate
 	seenToolCallIDs := make(map[string]bool)
 	if content.IsArray() {
 		for _, part := range content.Array() {
 			partType := part.Get("type").String()
 			switch partType {
 			case "text":
 				contentBuilder.WriteString(part.Get("text").String())
 			case "image_url":
 				imageURL := part.Get("image_url.url").String()
 				if strings.HasPrefix(imageURL, "data:") {
 					// Parse data URL: data:image/png;base64,xxxxx
 					if idx := strings.Index(imageURL, ";base64,"); idx != -1 {
 						mediaType := imageURL[5:idx] // Skip "data:"
 						data := imageURL[idx+8:]     // Skip ";base64,"
 						format := ""
 						if lastSlash := strings.LastIndex(mediaType, "/"); lastSlash != -1 {
 							format = mediaType[lastSlash+1:]
 						}
 						if format != "" && data != "" {
 							images = append(images, KiroImage{
 								Format: format,
 								Source: KiroImageSource{
 									Bytes: data,
 								},
 							})
 						}
 					}
 				}
 			}
 		}
 	} else if content.Type == gjson.String {
 		contentBuilder.WriteString(content.String())
 	}
 	// Check for tool_calls in the message (shouldn't be in user messages, but handle edge cases)
 	_ = seenToolCallIDs // Used for deduplication if needed
 	userMsg := KiroUserInputMessage{
 		Content: contentBuilder.String(),
 		ModelID: modelID,
 		Origin:  origin,
 	}
 	if len(images) > 0 {
 		userMsg.Images = images
 	}
 	return userMsg, toolResults
 }
 // buildAssistantMessageFromOpenAI builds an assistant message from OpenAI format
 func buildAssistantMessageFromOpenAI(msg gjson.Result) KiroAssistantResponseMessage {
 	content := msg.Get("content")
 	var contentBuilder strings.Builder
 	var toolUses []KiroToolUse
 	// Handle content
 	if content.Type == gjson.String {
 		contentBuilder.WriteString(content.String())
 	} else if content.IsArray() {
 		for _, part := range content.Array() {
 			if part.Get("type").String() == "text" {
 				contentBuilder.WriteString(part.Get("text").String())
 			}
 		}
 	}
 	// Handle tool_calls
 	toolCalls := msg.Get("tool_calls")
 	if toolCalls.IsArray() {
 		for _, tc := range toolCalls.Array() {
 			if tc.Get("type").String() != "function" {
 				continue
 			}
 			toolUseID := tc.Get("id").String()
 			toolName := tc.Get("function.name").String()
 			toolArgs := tc.Get("function.arguments").String()
 			var inputMap map[string]interface{}
 			if err := json.Unmarshal([]byte(toolArgs), &inputMap); err != nil {
 				log.Debugf("kiro-openai: failed to parse tool arguments: %v", err)
 				inputMap = make(map[string]interface{})
 			}
 			toolUses = append(toolUses, KiroToolUse{
 				ToolUseID: toolUseID,
 				Name:      toolName,
 				Input:     inputMap,
 			})
 		}
 	}
 	return KiroAssistantResponseMessage{
 		Content:  contentBuilder.String(),
 		ToolUses: toolUses,
 	}
 }
 // buildFinalContent builds the final content with system prompt
 func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResult) string {
 	var contentBuilder strings.Builder
 	if systemPrompt != "" {
 		contentBuilder.WriteString("--- SYSTEM PROMPT ---\n")
 		contentBuilder.WriteString(systemPrompt)
 		contentBuilder.WriteString("\n--- END SYSTEM PROMPT ---\n\n")
 	}
 	contentBuilder.WriteString(content)
 	finalContent := contentBuilder.String()
 	// CRITICAL: Kiro API requires content to be non-empty
 	if strings.TrimSpace(finalContent) == "" {
 		if len(toolResults) > 0 {
 			finalContent = "Tool results provided."
 		} else {
 			finalContent = "Continue"
 		}
 		log.Debugf("kiro-openai: content was empty, using default: %s", finalContent)
 	}
 	return finalContent
 }
 // checkThinkingModeFromOpenAI checks if thinking mode is enabled in the OpenAI request.
 // Returns (thinkingEnabled, budgetTokens).
 // Supports:
 // - reasoning_effort parameter (low/medium/high/auto)
 // - Model name containing "thinking" or "reason"
 // - <thinking_mode> tag in system prompt (AMP/Cursor format)
 func checkThinkingModeFromOpenAI(openaiBody []byte) (bool, int64) {
 	var budgetTokens int64 = 16000 // Default budget
 	// Check OpenAI format: reasoning_effort parameter
 	// Valid values: "low", "medium", "high", "auto" (not "none")
 	reasoningEffort := gjson.GetBytes(openaiBody, "reasoning_effort")
 	if reasoningEffort.Exists() {
 		effort := reasoningEffort.String()
 		if effort != "" && effort != "none" {
 			log.Debugf("kiro-openai: thinking mode enabled via reasoning_effort: %s", effort)
 			// Adjust budget based on effort level
 			switch effort {
 			case "low":
 				budgetTokens = 8000
 			case "medium":
 				budgetTokens = 16000
 			case "high":
 				budgetTokens = 32000
 			case "auto":
 				budgetTokens = 16000
 			}
 			return true, budgetTokens
 		}
 	}
 	// Check AMP/Cursor format: <thinking_mode>interleaved</thinking_mode> in system prompt
 	bodyStr := string(openaiBody)
 	if strings.Contains(bodyStr, "<thinking_mode>") && strings.Contains(bodyStr, "</thinking_mode>") {
 		startTag := "<thinking_mode>"
 		endTag := "</thinking_mode>"
 		startIdx := strings.Index(bodyStr, startTag)
 		if startIdx >= 0 {
 			startIdx += len(startTag)
 			endIdx := strings.Index(bodyStr[startIdx:], endTag)
 			if endIdx >= 0 {
 				thinkingMode := bodyStr[startIdx : startIdx+endIdx]
 				if thinkingMode == "interleaved" || thinkingMode == "enabled" {
 					log.Debugf("kiro-openai: thinking mode enabled via AMP/Cursor format: %s", thinkingMode)
 					// Try to extract max_thinking_length if present
 					if maxLenStart := strings.Index(bodyStr, "<max_thinking_length>"); maxLenStart >= 0 {
 						maxLenStart += len("<max_thinking_length>")
 						if maxLenEnd := strings.Index(bodyStr[maxLenStart:], "</max_thinking_length>"); maxLenEnd >= 0 {
 							maxLenStr := bodyStr[maxLenStart : maxLenStart+maxLenEnd]
 							if parsed, err := fmt.Sscanf(maxLenStr, "%d", &budgetTokens); err == nil && parsed == 1 {
 								log.Debugf("kiro-openai: extracted max_thinking_length: %d", budgetTokens)
 							}
 						}
 					}
 					return true, budgetTokens
 				}
 			}
 		}
 	}
 	// Check model name for thinking hints
 	model := gjson.GetBytes(openaiBody, "model").String()
 	modelLower := strings.ToLower(model)
 	if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") {
 		log.Debugf("kiro-openai: thinking mode enabled via model name hint: %s", model)
 		return true, budgetTokens
 	}
 	log.Debugf("kiro-openai: no thinking mode detected in OpenAI request")
 	return false, budgetTokens
 }
 // extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint.
 // OpenAI tool_choice values:
 // - "none": Don't use any tools
 // - "auto": Model decides (default, no hint needed)
 // - "required": Must use at least one tool
 // - {"type":"function","function":{"name":"..."}} : Must use specific tool
 func extractToolChoiceHint(openaiBody []byte) string {
 	toolChoice := gjson.GetBytes(openaiBody, "tool_choice")
 	if !toolChoice.Exists() {
 		return ""
 	}
 	// Handle string values
 	if toolChoice.Type == gjson.String {
 		switch toolChoice.String() {
 		case "none":
 			// Note: When tool_choice is "none", we should ideally not pass tools at all
 			// But since we can't modify tool passing here, we add a strong hint
 			return "[INSTRUCTION: Do NOT use any tools. Respond with text only.]"
 		case "required":
 			return "[INSTRUCTION: You MUST use at least one of the available tools to respond. Do not respond with text only - always make a tool call.]"
 		case "auto":
 			// Default behavior, no hint needed
 			return ""
 		}
 	}
 	// Handle object value: {"type":"function","function":{"name":"..."}}
 	if toolChoice.IsObject() {
 		if toolChoice.Get("type").String() == "function" {
 			toolName := toolChoice.Get("function.name").String()
 			if toolName != "" {
 				return fmt.Sprintf("[INSTRUCTION: You MUST use the tool named '%s' to respond. Do not use any other tool or respond with text only.]", toolName)
 			}
 		}
 	}
 	return ""
 }
 // extractResponseFormatHint extracts response_format from OpenAI request and returns a system prompt hint.
 // OpenAI response_format values:
 // - {"type": "text"}: Default, no hint needed
 // - {"type": "json_object"}: Must respond with valid JSON
 // - {"type": "json_schema", "json_schema": {...}}: Must respond with JSON matching schema
 func extractResponseFormatHint(openaiBody []byte) string {
 	responseFormat := gjson.GetBytes(openaiBody, "response_format")
 	if !responseFormat.Exists() {
 		return ""
 	}
 	formatType := responseFormat.Get("type").String()
 	switch formatType {
 	case "json_object":
 		return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]"
 	case "json_schema":
 		// Extract schema if provided
 		schema := responseFormat.Get("json_schema.schema")
 		if schema.Exists() {
 			schemaStr := schema.Raw
 			// Truncate if too long
 			if len(schemaStr) > 500 {
 				schemaStr = schemaStr[:500] + "..."
 			}
 			return fmt.Sprintf("[INSTRUCTION: You MUST respond with valid JSON that matches this schema: %s. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]", schemaStr)
 		}
 		return "[INSTRUCTION: You MUST respond with valid JSON only. Do not include any text before or after the JSON. Do not wrap the JSON in markdown code blocks. Output raw JSON directly.]"
 	case "text":
 		// Default behavior, no hint needed
 		return ""
 	}
 	return ""
 }
 // deduplicateToolResults removes duplicate tool results
 func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult {
 	if len(toolResults) == 0 {
 		return toolResults
 	}
 	seenIDs := make(map[string]bool)
 	unique := make([]KiroToolResult, 0, len(toolResults))
 	for _, tr := range toolResults {
 		if !seenIDs[tr.ToolUseID] {
 			seenIDs[tr.ToolUseID] = true
 			unique = append(unique, tr)
 		} else {
 			log.Debugf("kiro-openai: skipping duplicate toolResult: %s", tr.ToolUseID)
 		}
 	}
 	return unique
 }
--- a/internal/translator/kiro/openai/kiro_openai_response.go
+++ b/internal/translator/kiro/openai/kiro_openai_response.go
@@ -0,0 +1,264 @@
 // Package openai provides response translation from Kiro to OpenAI format.
 // This package handles the conversion of Kiro API responses into OpenAI Chat Completions-compatible
 // JSON format, transforming streaming events and non-streaming responses.
 package openai
 import (
 	"encoding/json"
 	"fmt"
 	"sync/atomic"
 	"time"
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
 	log "github.com/sirupsen/logrus"
 )
 // functionCallIDCounter provides a process-wide unique counter for function call identifiers.
 var functionCallIDCounter uint64
 // BuildOpenAIResponse constructs an OpenAI Chat Completions-compatible response.
 // Supports tool_calls when tools are present in the response.
 // stopReason is passed from upstream; fallback logic applied if empty.
 func BuildOpenAIResponse(content string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte {
 	// Build the message object
 	message := map[string]interface{}{
 		"role":    "assistant",
 		"content": content,
 	}
 	// Add tool_calls if present
 	if len(toolUses) > 0 {
 		var toolCalls []map[string]interface{}
 		for i, tu := range toolUses {
 			inputJSON, _ := json.Marshal(tu.Input)
 			toolCalls = append(toolCalls, map[string]interface{}{
 				"id":    tu.ToolUseID,
 				"type":  "function",
 				"index": i,
 				"function": map[string]interface{}{
 					"name":      tu.Name,
 					"arguments": string(inputJSON),
 				},
 			})
 		}
 		message["tool_calls"] = toolCalls
 		// When tool_calls are present, content should be null according to OpenAI spec
 		if content == "" {
 			message["content"] = nil
 		}
 	}
 	// Use upstream stopReason; apply fallback logic if not provided
 	finishReason := mapKiroStopReasonToOpenAI(stopReason)
 	if finishReason == "" {
 		finishReason = "stop"
 		if len(toolUses) > 0 {
 			finishReason = "tool_calls"
 		}
 		log.Debugf("kiro-openai: buildOpenAIResponse using fallback finish_reason: %s", finishReason)
 	}
 	response := map[string]interface{}{
 		"id":      "chatcmpl-" + uuid.New().String()[:24],
 		"object":  "chat.completion",
 		"created": time.Now().Unix(),
 		"model":   model,
 		"choices": []map[string]interface{}{
 			{
 				"index":         0,
 				"message":       message,
 				"finish_reason": finishReason,
 			},
 		},
 		"usage": map[string]interface{}{
 			"prompt_tokens":     usageInfo.InputTokens,
 			"completion_tokens": usageInfo.OutputTokens,
 			"total_tokens":      usageInfo.InputTokens + usageInfo.OutputTokens,
 		},
 	}
 	result, _ := json.Marshal(response)
 	return result
 }
 // mapKiroStopReasonToOpenAI converts Kiro/Claude stop_reason to OpenAI finish_reason
 func mapKiroStopReasonToOpenAI(stopReason string) string {
 	switch stopReason {
 	case "end_turn":
 		return "stop"
 	case "stop_sequence":
 		return "stop"
 	case "tool_use":
 		return "tool_calls"
 	case "max_tokens":
 		return "length"
 	case "content_filtered":
 		return "content_filter"
 	default:
 		return stopReason
 	}
 }
 // BuildOpenAIStreamChunk constructs an OpenAI Chat Completions streaming chunk.
 // This is the delta format used in streaming responses.
 func BuildOpenAIStreamChunk(model string, deltaContent string, deltaToolCalls []map[string]interface{}, finishReason string, index int) []byte {
 	delta := map[string]interface{}{}
 	// First chunk should include role
 	if index == 0 && deltaContent == "" && len(deltaToolCalls) == 0 {
 		delta["role"] = "assistant"
 		delta["content"] = ""
 	} else if deltaContent != "" {
 		delta["content"] = deltaContent
 	}
 	// Add tool_calls delta if present
 	if len(deltaToolCalls) > 0 {
 		delta["tool_calls"] = deltaToolCalls
 	}
 	choice := map[string]interface{}{
 		"index": 0,
 		"delta": delta,
 	}
 	if finishReason != "" {
 		choice["finish_reason"] = finishReason
 	} else {
 		choice["finish_reason"] = nil
 	}
 	chunk := map[string]interface{}{
 		"id":      "chatcmpl-" + uuid.New().String()[:12],
 		"object":  "chat.completion.chunk",
 		"created": time.Now().Unix(),
 		"model":   model,
 		"choices": []map[string]interface{}{choice},
 	}
 	result, _ := json.Marshal(chunk)
 	return result
 }
 // BuildOpenAIStreamChunkWithToolCallStart creates a stream chunk for tool call start
 func BuildOpenAIStreamChunkWithToolCallStart(model string, toolUseID, toolName string, toolIndex int) []byte {
 	toolCall := map[string]interface{}{
 		"index": toolIndex,
 		"id":    toolUseID,
 		"type":  "function",
 		"function": map[string]interface{}{
 			"name":      toolName,
 			"arguments": "",
 		},
 	}
 	delta := map[string]interface{}{
 		"tool_calls": []map[string]interface{}{toolCall},
 	}
 	choice := map[string]interface{}{
 		"index":         0,
 		"delta":         delta,
 		"finish_reason": nil,
 	}
 	chunk := map[string]interface{}{
 		"id":      "chatcmpl-" + uuid.New().String()[:12],
 		"object":  "chat.completion.chunk",
 		"created": time.Now().Unix(),
 		"model":   model,
 		"choices": []map[string]interface{}{choice},
 	}
 	result, _ := json.Marshal(chunk)
 	return result
 }
 // BuildOpenAIStreamChunkWithToolCallDelta creates a stream chunk for tool call arguments delta
 func BuildOpenAIStreamChunkWithToolCallDelta(model string, argumentsDelta string, toolIndex int) []byte {
 	toolCall := map[string]interface{}{
 		"index": toolIndex,
 		"function": map[string]interface{}{
 			"arguments": argumentsDelta,
 		},
 	}
 	delta := map[string]interface{}{
 		"tool_calls": []map[string]interface{}{toolCall},
 	}
 	choice := map[string]interface{}{
 		"index":         0,
 		"delta":         delta,
 		"finish_reason": nil,
 	}
 	chunk := map[string]interface{}{
 		"id":      "chatcmpl-" + uuid.New().String()[:12],
 		"object":  "chat.completion.chunk",
 		"created": time.Now().Unix(),
 		"model":   model,
 		"choices": []map[string]interface{}{choice},
 	}
 	result, _ := json.Marshal(chunk)
 	return result
 }
 // BuildOpenAIStreamDoneChunk creates the final [DONE] stream event
 func BuildOpenAIStreamDoneChunk() []byte {
 	return []byte("data: [DONE]")
 }
 // BuildOpenAIStreamFinishChunk creates the final chunk with finish_reason
 func BuildOpenAIStreamFinishChunk(model string, finishReason string) []byte {
 	choice := map[string]interface{}{
 		"index":         0,
 		"delta":         map[string]interface{}{},
 		"finish_reason": finishReason,
 	}
 	chunk := map[string]interface{}{
 		"id":      "chatcmpl-" + uuid.New().String()[:12],
 		"object":  "chat.completion.chunk",
 		"created": time.Now().Unix(),
 		"model":   model,
 		"choices": []map[string]interface{}{choice},
 	}
 	result, _ := json.Marshal(chunk)
 	return result
 }
 // BuildOpenAIStreamUsageChunk creates a chunk with usage information (optional, for stream_options.include_usage)
 func BuildOpenAIStreamUsageChunk(model string, usageInfo usage.Detail) []byte {
 	chunk := map[string]interface{}{
 		"id":      "chatcmpl-" + uuid.New().String()[:12],
 		"object":  "chat.completion.chunk",
 		"created": time.Now().Unix(),
 		"model":   model,
 		"choices": []map[string]interface{}{},
 		"usage": map[string]interface{}{
 			"prompt_tokens":     usageInfo.InputTokens,
 			"completion_tokens": usageInfo.OutputTokens,
 			"total_tokens":      usageInfo.InputTokens + usageInfo.OutputTokens,
 		},
 	}
 	result, _ := json.Marshal(chunk)
 	return result
 }
 // GenerateToolCallID generates a unique tool call ID in OpenAI format
 func GenerateToolCallID(toolName string) string {
 	return fmt.Sprintf("call_%s_%d_%d", toolName[:min(8, len(toolName))], time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1))
 }
 // min returns the minimum of two integers
 func min(a, b int) int {
 	if a < b {
 		return a
 	}
 	return b
 }
--- a/internal/translator/kiro/openai/kiro_openai_stream.go
+++ b/internal/translator/kiro/openai/kiro_openai_stream.go
@@ -0,0 +1,212 @@
 // Package openai provides streaming SSE event building for OpenAI format.
 // This package handles the construction of OpenAI-compatible Server-Sent Events (SSE)
 // for streaming responses from Kiro API.
 package openai
 import (
 	"encoding/json"
 	"time"
 	"github.com/google/uuid"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
 )
 // OpenAIStreamState tracks the state of streaming response conversion
 type OpenAIStreamState struct {
 	ChunkIndex        int
 	ToolCallIndex     int
 	HasSentFirstChunk bool
 	Model             string
 	ResponseID        string
 	Created           int64
 }
 // NewOpenAIStreamState creates a new stream state for tracking
 func NewOpenAIStreamState(model string) *OpenAIStreamState {
 	return &OpenAIStreamState{
 		ChunkIndex:        0,
 		ToolCallIndex:     0,
 		HasSentFirstChunk: false,
 		Model:             model,
 		ResponseID:        "chatcmpl-" + uuid.New().String()[:24],
 		Created:           time.Now().Unix(),
 	}
 }
 // FormatSSEEvent formats a JSON payload for SSE streaming.
 // Note: This returns raw JSON data without "data:" prefix.
 // The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go)
 // to maintain architectural consistency and avoid double-prefix issues.
 func FormatSSEEvent(data []byte) string {
 	return string(data)
 }
 // BuildOpenAISSETextDelta creates an SSE event for text content delta
 func BuildOpenAISSETextDelta(state *OpenAIStreamState, textDelta string) string {
 	delta := map[string]interface{}{
 		"content": textDelta,
 	}
 	// Include role in first chunk
 	if !state.HasSentFirstChunk {
 		delta["role"] = "assistant"
 		state.HasSentFirstChunk = true
 	}
 	chunk := buildBaseChunk(state, delta, nil)
 	result, _ := json.Marshal(chunk)
 	state.ChunkIndex++
 	return FormatSSEEvent(result)
 }
 // BuildOpenAISSEToolCallStart creates an SSE event for tool call start
 func BuildOpenAISSEToolCallStart(state *OpenAIStreamState, toolUseID, toolName string) string {
 	toolCall := map[string]interface{}{
 		"index": state.ToolCallIndex,
 		"id":    toolUseID,
 		"type":  "function",
 		"function": map[string]interface{}{
 			"name":      toolName,
 			"arguments": "",
 		},
 	}
 	delta := map[string]interface{}{
 		"tool_calls": []map[string]interface{}{toolCall},
 	}
 	// Include role in first chunk if not sent yet
 	if !state.HasSentFirstChunk {
 		delta["role"] = "assistant"
 		state.HasSentFirstChunk = true
 	}
 	chunk := buildBaseChunk(state, delta, nil)
 	result, _ := json.Marshal(chunk)
 	state.ChunkIndex++
 	return FormatSSEEvent(result)
 }
 // BuildOpenAISSEToolCallArgumentsDelta creates an SSE event for tool call arguments delta
 func BuildOpenAISSEToolCallArgumentsDelta(state *OpenAIStreamState, argumentsDelta string, toolIndex int) string {
 	toolCall := map[string]interface{}{
 		"index": toolIndex,
 		"function": map[string]interface{}{
 			"arguments": argumentsDelta,
 		},
 	}
 	delta := map[string]interface{}{
 		"tool_calls": []map[string]interface{}{toolCall},
 	}
 	chunk := buildBaseChunk(state, delta, nil)
 	result, _ := json.Marshal(chunk)
 	state.ChunkIndex++
 	return FormatSSEEvent(result)
 }
 // BuildOpenAISSEFinish creates an SSE event with finish_reason
 func BuildOpenAISSEFinish(state *OpenAIStreamState, finishReason string) string {
 	chunk := buildBaseChunk(state, map[string]interface{}{}, &finishReason)
 	result, _ := json.Marshal(chunk)
 	state.ChunkIndex++
 	return FormatSSEEvent(result)
 }
 // BuildOpenAISSEUsage creates an SSE event with usage information
 func BuildOpenAISSEUsage(state *OpenAIStreamState, usageInfo usage.Detail) string {
 	chunk := map[string]interface{}{
 		"id":      state.ResponseID,
 		"object":  "chat.completion.chunk",
 		"created": state.Created,
 		"model":   state.Model,
 		"choices": []map[string]interface{}{},
 		"usage": map[string]interface{}{
 			"prompt_tokens":     usageInfo.InputTokens,
 			"completion_tokens": usageInfo.OutputTokens,
 			"total_tokens":      usageInfo.InputTokens + usageInfo.OutputTokens,
 		},
 	}
 	result, _ := json.Marshal(chunk)
 	return FormatSSEEvent(result)
 }
 // BuildOpenAISSEDone creates the final [DONE] SSE event.
 // Note: This returns raw "[DONE]" without "data:" prefix.
 // The SSE "data:" prefix is added by the Handler layer (e.g., openai_handlers.go)
 // to maintain architectural consistency and avoid double-prefix issues.
 func BuildOpenAISSEDone() string {
 	return "[DONE]"
 }
 // buildBaseChunk creates a base chunk structure for streaming
 func buildBaseChunk(state *OpenAIStreamState, delta map[string]interface{}, finishReason *string) map[string]interface{} {
 	choice := map[string]interface{}{
 		"index": 0,
 		"delta": delta,
 	}
 	if finishReason != nil {
 		choice["finish_reason"] = *finishReason
 	} else {
 		choice["finish_reason"] = nil
 	}
 	return map[string]interface{}{
 		"id":      state.ResponseID,
 		"object":  "chat.completion.chunk",
 		"created": state.Created,
 		"model":   state.Model,
 		"choices": []map[string]interface{}{choice},
 	}
 }
 // BuildOpenAISSEReasoningDelta creates an SSE event for reasoning content delta
 // This is used for o1/o3 style models that expose reasoning tokens
 func BuildOpenAISSEReasoningDelta(state *OpenAIStreamState, reasoningDelta string) string {
 	delta := map[string]interface{}{
 		"reasoning_content": reasoningDelta,
 	}
 	// Include role in first chunk
 	if !state.HasSentFirstChunk {
 		delta["role"] = "assistant"
 		state.HasSentFirstChunk = true
 	}
 	chunk := buildBaseChunk(state, delta, nil)
 	result, _ := json.Marshal(chunk)
 	state.ChunkIndex++
 	return FormatSSEEvent(result)
 }
 // BuildOpenAISSEFirstChunk creates the first chunk with role only
 func BuildOpenAISSEFirstChunk(state *OpenAIStreamState) string {
 	delta := map[string]interface{}{
 		"role":    "assistant",
 		"content": "",
 	}
 	state.HasSentFirstChunk = true
 	chunk := buildBaseChunk(state, delta, nil)
 	result, _ := json.Marshal(chunk)
 	state.ChunkIndex++
 	return FormatSSEEvent(result)
 }
 // ThinkingTagState tracks state for thinking tag detection in streaming
 type ThinkingTagState struct {
 	InThinkingBlock   bool
 	PendingStartChars int
 	PendingEndChars   int
 }
 // NewThinkingTagState creates a new thinking tag state
 func NewThinkingTagState() *ThinkingTagState {
 	return &ThinkingTagState{
 		InThinkingBlock:   false,
 		PendingStartChars: 0,
 		PendingEndChars:   0,
 	}
 }
Author	SHA1	Message	Date
Luis Pater	b80c2aabb0	Merge branch 'router-for-me:main' into main	2025-12-14 16:19:29 +08:00
Luis Pater	f0a3eb574e	fix(registry): update DeepSeek model definitions with new IDs and descriptions	2025-12-14 16:17:11 +08:00
Luis Pater	14ce6aebd1	Merge pull request #449 from sususu98/fix/gemini-cli-429-retry-delay-parsing fix(gemini-cli): enhance 429 retry delay parsing	2025-12-14 14:04:14 +08:00
Luis Pater	2fe83723f2	Merge pull request #515 from teeverc/fix/response-rewriter-streaming-flush fix(amp): flush response buffer after each streaming chunk write	2025-12-14 13:26:05 +08:00
Luis Pater	e73b9e10a6	Merge pull request #24 from Ravens2121/master feat(kiro): Major Refactoring + OpenAI Translator Implementation + Streaming Fixes	2025-12-14 12:51:28 +08:00
Ravens2121	9c04c18c04	feat(kiro): enhance request translation and fix streaming issues English: - Fix <thinking> tag parsing: only parse at response start, avoid misinterpreting discussion text - Add token counting support using tiktoken for local estimation - Support top_p parameter in inference config - Handle max_tokens=-1 as maximum (32000 tokens) - Add tool_choice and response_format parameter handling via system prompt hints - Support multiple thinking mode detection formats (Claude API, OpenAI reasoning_effort, AMP/Cursor) - Shorten MCP tool names exceeding 64 characters - Fix duplicate [DONE] marker in OpenAI SSE streaming - Enhance token usage statistics with multiple event format support - Add code fence markers to constants 中文: - 修复 <thinking> 标签解析：仅在响应开头解析，避免误解析讨论文本中的标签 - 使用 tiktoken 实现本地 token 计数功能 - 支持 top_p 推理配置参数 - 处理 max_tokens=-1 转换为最大值（32000 tokens） - 通过系统提示词注入实现 tool_choice 和 response_format 参数支持 - 支持多种思考模式检测格式（Claude API、OpenAI reasoning_effort、AMP/Cursor） - 截断超过64字符的 MCP 工具名称 - 修复 OpenAI SSE 流中重复的 [DONE] 标记 - 增强 token 使用量统计，支持多种事件格式 - 添加代码围栏标记常量	2025-12-14 11:57:16 +08:00
Ravens2121	81ae09d0ec	Merge branch 'kiro-refactor-backup'	2025-12-14 07:03:24 +08:00
Ravens2121	01cf221167	feat(kiro): 代码优化重构 + OpenAI翻译器实现	2025-12-14 06:58:50 +08:00
teeverc	cd8c86c6fb	refactor: only flush stream response on successful write	2025-12-13 13:32:54 -08:00
teeverc	52d5fd1a67	fix: streaming for amp cli	2025-12-13 13:17:53 -08:00
sususu	07d21463ca	fix(gemini-cli): enhance 429 retry delay parsing Add fallback parsing for quota reset delay when RetryInfo is not present: - Try ErrorInfo.metadata.quotaResetDelay (e.g., "373.801628ms") - Parse from error.message "Your quota will reset after Xs." This ensures proper cooldown timing for rate-limited requests. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2025-12-11 09:34:39 +08:00