diff --git a/internal/runtime/executor/kiro_executor.go b/internal/runtime/executor/kiro_executor.go index a8237cd7..3d1e2d51 100644 --- a/internal/runtime/executor/kiro_executor.go +++ b/internal/runtime/executor/kiro_executor.go @@ -2551,6 +2551,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out isThinkingBlockOpen := false // Track if thinking content block SSE event is open thinkingBlockIndex := -1 // Index of the thinking content block var accumulatedThinkingContent strings.Builder // Accumulate thinking content for token counting + hasOfficialReasoningEvent := false // Disable tag parsing after official reasoning events appear // Buffer for handling partial tag matches at chunk boundaries var pendingContent strings.Builder // Buffer content that might be part of a tag @@ -2986,6 +2987,31 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out lastUsageUpdateTime = time.Now() } + if hasOfficialReasoningEvent { + processText := strings.TrimSpace(strings.ReplaceAll(strings.ReplaceAll(contentDelta, kirocommon.ThinkingStartTag, ""), kirocommon.ThinkingEndTag, "")) + if processText != "" { + if !isTextBlockOpen { + contentBlockIndex++ + isTextBlockOpen = true + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + claudeEvent := kiroclaude.BuildClaudeStreamEvent(processText, contentBlockIndex) + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + continue + } + // TAG-BASED THINKING PARSING: Parse tags from content // Combine pending content with new content for processing pendingContent.WriteString(contentDelta) @@ -3264,6 +3290,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } if thinkingText != "" { + hasOfficialReasoningEvent = true // Close text block if open before starting thinking block if isTextBlockOpen && contentBlockIndex >= 0 { blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex) diff --git a/internal/translator/kiro/claude/kiro_claude_request.go b/internal/translator/kiro/claude/kiro_claude_request.go index 7012e644..0ad090ae 100644 --- a/internal/translator/kiro/claude/kiro_claude_request.go +++ b/internal/translator/kiro/claude/kiro_claude_request.go @@ -243,13 +243,11 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA // Process messages and build history history, currentUserMsg, currentToolResults := processMessages(messages, modelID, origin) - // Build content with system prompt (only on first turn to avoid re-injection) + // Build content with system prompt. + // Keep thinking tags on subsequent turns so multi-turn Claude sessions + // continue to emit reasoning events. if currentUserMsg != nil { - effectiveSystemPrompt := systemPrompt - if len(history) > 0 { - effectiveSystemPrompt = "" // Don't re-inject on subsequent turns - } - currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, effectiveSystemPrompt, currentToolResults) + currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, systemPrompt, currentToolResults) // Deduplicate currentToolResults currentToolResults = deduplicateToolResults(currentToolResults) @@ -475,6 +473,15 @@ func IsThinkingEnabledWithHeaders(body []byte, headers http.Header) bool { } } + // Check model name directly for thinking hints. + // This enables thinking variants even when clients don't send explicit thinking fields. + model := strings.TrimSpace(gjson.GetBytes(body, "model").String()) + modelLower := strings.ToLower(model) + if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") { + log.Debugf("kiro: thinking mode enabled via model name hint: %s", model) + return true + } + log.Debugf("kiro: IsThinkingEnabled returning false (no thinking mode detected)") return false } diff --git a/internal/translator/kiro/openai/kiro_openai_request.go b/internal/translator/kiro/openai/kiro_openai_request.go index 9515848f..474231b3 100644 --- a/internal/translator/kiro/openai/kiro_openai_request.go +++ b/internal/translator/kiro/openai/kiro_openai_request.go @@ -234,16 +234,16 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s // Kiro API supports official thinking/reasoning mode via tag. // When set to "enabled", Kiro returns reasoning content as official reasoningContentEvent // rather than inline tags in assistantResponseEvent. - // We use a high max_thinking_length to allow extensive reasoning. + // Use a conservative thinking budget to reduce latency/cost spikes in long sessions. if thinkingEnabled { thinkingHint := `enabled -200000` +16000` if systemPrompt != "" { systemPrompt = thinkingHint + "\n\n" + systemPrompt } else { systemPrompt = thinkingHint } - log.Debugf("kiro-openai: injected thinking prompt (official mode)") + log.Infof("kiro-openai: injected thinking prompt (official mode), has_tools: %v", len(kiroTools) > 0) } // Process messages and build history @@ -831,7 +831,6 @@ func hasThinkingTagInBody(body []byte) bool { return strings.Contains(bodyStr, "") || strings.Contains(bodyStr, "") } - // extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint. // OpenAI tool_choice values: // - "none": Don't use any tools