feat(kiro): enhance thinking support and fix truncation issues

- **Thinking Support**: - Enabled thinking support for all Kiro Claude models, including Haiku 4.5 and agentic variants. - Updated `model_definitions.go` with thinking configuration (Min: 1024, Max: 32000, ZeroAllowed: true). - Fixed `extended_thinking` field names in `model_registry.go` (from `min_budget`/`max_budget` to `min`/`max`) to comply with Claude API specs, enabling thinking control in clients like Claude Code. - **Kiro Executor Fixes**: - Fixed `budget_tokens` handling: explicitly disable thinking when budget is 0 or negative. - Removed aggressive duplicate content filtering logic that caused truncation/data loss. - Enhanced thinking tag parsing with `extractThinkingFromContent` to correctly handle interleaved thinking/text blocks. - Added EOF handling to flush pending thinking tag characters, preventing data loss at stream end. - **Performance**: - Optimized Claude stream handler (v6.2) with reduced buffer size (4KB) and faster flush interval (50ms) to minimize latency and prevent timeouts.
2026-04-24 08:40:32 +00:00 · 2025-12-13 03:51:14 +08:00
parent 05b499fb83
commit db80b20bc2
4 changed files with 219 additions and 28 deletions
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -895,6 +895,7 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Opus 4.5 via Kiro (2.2x credit)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                  "kiro-claude-sonnet-4-5",
@@ -906,6 +907,7 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Sonnet 4.5 via Kiro (1.3x credit)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                  "kiro-claude-sonnet-4",
@@ -917,6 +919,7 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Sonnet 4 via Kiro (1.3x credit)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                  "kiro-claude-haiku-4-5",
@@ -928,6 +931,7 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Haiku 4.5 via Kiro (0.4x credit)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		// --- Agentic Variants (Optimized for coding agents with chunked writes) ---
 		{
@@ -940,6 +944,7 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Opus 4.5 optimized for coding agents (chunked writes)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                  "kiro-claude-sonnet-4-5-agentic",
@@ -951,6 +956,7 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Sonnet 4.5 optimized for coding agents (chunked writes)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                  "kiro-claude-sonnet-4-agentic",
@@ -962,6 +968,7 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Sonnet 4 optimized for coding agents (chunked writes)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 		{
 			ID:                  "kiro-claude-haiku-4-5-agentic",
@@ -973,6 +980,7 @@ func GetKiroModels() []*ModelInfo {
 			Description:         "Claude Haiku 4.5 optimized for coding agents (chunked writes)",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
 		},
 	}
 }
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -748,7 +748,8 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 		}
 		return result

-	case "claude":
+	case "claude", "kiro", "antigravity":
+		// Claude, Kiro, and Antigravity all use Claude-compatible format for Claude Code client
 		result := map[string]any{
 			"id":       model.ID,
 			"object":   "model",
@@ -763,6 +764,19 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 		if model.DisplayName != "" {
 			result["display_name"] = model.DisplayName
 		}
+		// Add thinking support for Claude Code client
+		// Claude Code checks for "thinking" field (simple boolean) to enable tab toggle
+		// Also add "extended_thinking" for detailed budget info
+		if model.Thinking != nil {
+			result["thinking"] = true
+			result["extended_thinking"] = map[string]any{
+				"supported":       true,
+				"min":             model.Thinking.Min,
+				"max":             model.Thinking.Max,
+				"zero_allowed":    model.Thinking.ZeroAllowed,
+				"dynamic_allowed": model.Thinking.DynamicAllowed,
+			}
+		}
 		return result

 	case "gemini":
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -1118,10 +1118,18 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
 			// Read budget_tokens if specified - this value comes from:
 			// - Claude API: thinking.budget_tokens directly
 			// - OpenAI API: reasoning_effort -> budget_tokens (low:4000, medium:16000, high:32000)
-			if bt := thinkingField.Get("budget_tokens"); bt.Exists() && bt.Int() > 0 {
+			if bt := thinkingField.Get("budget_tokens"); bt.Exists() {
 				budgetTokens = bt.Int()
+				// If budget_tokens <= 0, disable thinking explicitly
+				// This allows users to disable thinking by setting budget_tokens to 0
+				if budgetTokens <= 0 {
+					thinkingEnabled = false
+					log.Debugf("kiro: thinking mode disabled via budget_tokens <= 0")
+				}
+			}
+			if thinkingEnabled {
+				log.Debugf("kiro: thinking mode enabled via Claude API parameter, budget_tokens: %d", budgetTokens)
 			}
-			log.Debugf("kiro: thinking mode enabled via Claude API parameter, budget_tokens: %d", budgetTokens)
 		}
 	}

@@ -1737,15 +1745,23 @@ func getString(m map[string]interface{}, key string) string {

 // buildClaudeResponse constructs a Claude-compatible response.
 // Supports tool_use blocks when tools are present in the response.
+// Supports thinking blocks - parses <thinking> tags and converts to Claude thinking content blocks.
 func (e *KiroExecutor) buildClaudeResponse(content string, toolUses []kiroToolUse, model string, usageInfo usage.Detail) []byte {
 	var contentBlocks []map[string]interface{}

-	// Add text content if present
+	// Extract thinking blocks and text from content
+	// This handles <thinking>...</thinking> tags from Kiro's response
 	if content != "" {
-		contentBlocks = append(contentBlocks, map[string]interface{}{
-			"type": "text",
-			"text": content,
-		})
+		blocks := e.extractThinkingFromContent(content)
+		contentBlocks = append(contentBlocks, blocks...)
+		
+		// DIAGNOSTIC: Log if thinking blocks were extracted
+		for _, block := range blocks {
+			if block["type"] == "thinking" {
+				thinkingContent := block["thinking"].(string)
+				log.Infof("kiro: buildClaudeResponse extracted thinking block (len: %d)", len(thinkingContent))
+			}
+		}
 	}

 	// Add tool_use blocks
@@ -1788,6 +1804,101 @@ func (e *KiroExecutor) buildClaudeResponse(content string, toolUses []kiroToolUs
 	return result
 }

+// extractThinkingFromContent parses content to extract thinking blocks and text.
+// Returns a list of content blocks in the order they appear in the content.
+// Handles interleaved thinking and text blocks correctly.
+// Based on the streaming implementation's thinking tag handling.
+func (e *KiroExecutor) extractThinkingFromContent(content string) []map[string]interface{} {
+	var blocks []map[string]interface{}
+	
+	if content == "" {
+		return blocks
+	}
+	
+	// Check if content contains thinking tags at all
+	if !strings.Contains(content, thinkingStartTag) {
+		// No thinking tags, return as plain text
+		return []map[string]interface{}{
+			{
+				"type": "text",
+				"text": content,
+			},
+		}
+	}
+	
+	log.Debugf("kiro: extractThinkingFromContent - found thinking tags in content (len: %d)", len(content))
+	
+	remaining := content
+	
+	for len(remaining) > 0 {
+		// Look for <thinking> tag
+		startIdx := strings.Index(remaining, thinkingStartTag)
+		
+		if startIdx == -1 {
+			// No more thinking tags, add remaining as text
+			if strings.TrimSpace(remaining) != "" {
+				blocks = append(blocks, map[string]interface{}{
+					"type": "text",
+					"text": remaining,
+				})
+			}
+			break
+		}
+		
+		// Add text before thinking tag (if any meaningful content)
+		if startIdx > 0 {
+			textBefore := remaining[:startIdx]
+			if strings.TrimSpace(textBefore) != "" {
+				blocks = append(blocks, map[string]interface{}{
+					"type": "text",
+					"text": textBefore,
+				})
+			}
+		}
+		
+		// Move past the opening tag
+		remaining = remaining[startIdx+len(thinkingStartTag):]
+		
+		// Find closing tag
+		endIdx := strings.Index(remaining, thinkingEndTag)
+		
+		if endIdx == -1 {
+			// No closing tag found, treat rest as thinking content (incomplete response)
+			if strings.TrimSpace(remaining) != "" {
+				blocks = append(blocks, map[string]interface{}{
+					"type":     "thinking",
+					"thinking": remaining,
+				})
+				log.Warnf("kiro: extractThinkingFromContent - missing closing </thinking> tag")
+			}
+			break
+		}
+		
+		// Extract thinking content between tags
+		thinkContent := remaining[:endIdx]
+		if strings.TrimSpace(thinkContent) != "" {
+			blocks = append(blocks, map[string]interface{}{
+				"type":     "thinking",
+				"thinking": thinkContent,
+			})
+			log.Debugf("kiro: extractThinkingFromContent - extracted thinking block (len: %d)", len(thinkContent))
+		}
+		
+		// Move past the closing tag
+		remaining = remaining[endIdx+len(thinkingEndTag):]
+	}
+	
+	// If no blocks were created (all whitespace), return empty text block
+	if len(blocks) == 0 {
+		blocks = append(blocks, map[string]interface{}{
+			"type": "text",
+			"text": "",
+		})
+	}
+	
+	return blocks
+}
+
 // NOTE: Tool uses are now extracted from API response, not parsed from text


@@ -1804,9 +1915,10 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 	processedIDs := make(map[string]bool)
 	var currentToolUse *toolUseState

-	// Duplicate content detection - tracks last content event to filter duplicates
-	// Based on AIClient-2-API implementation for Kiro
-	var lastContentEvent string
+	// NOTE: Duplicate content filtering removed - it was causing legitimate repeated
+	// content (like consecutive newlines) to be incorrectly filtered out.
+	// The previous implementation compared lastContentEvent == contentDelta which
+	// is too aggressive for streaming scenarios.

 	// Streaming token calculation - accumulate content for real-time token counting
 	// Based on AIClient-2-API implementation
@@ -1905,6 +2017,56 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 				hasToolUses = true
 				currentToolUse = nil
 			}
+			
+			// Flush any pending tag characters at EOF
+			// These are partial tag prefixes that were held back waiting for more data
+			// Since no more data is coming, output them as regular text
+			var pendingText string
+			if pendingStartTagChars > 0 {
+				pendingText = thinkingStartTag[:pendingStartTagChars]
+				log.Debugf("kiro: flushing pending start tag chars at EOF: %q", pendingText)
+				pendingStartTagChars = 0
+			}
+			if pendingEndTagChars > 0 {
+				pendingText += thinkingEndTag[:pendingEndTagChars]
+				log.Debugf("kiro: flushing pending end tag chars at EOF: %q", pendingText)
+				pendingEndTagChars = 0
+			}
+			
+			// Output pending text if any
+			if pendingText != "" {
+				// If we're in a thinking block, output as thinking content
+				if inThinkBlock && isThinkingBlockOpen {
+					thinkingEvent := e.buildClaudeThinkingDeltaEvent(pendingText, thinkingBlockIndex)
+					sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, thinkingEvent, &translatorParam)
+					for _, chunk := range sseData {
+						if chunk != "" {
+							out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+						}
+					}
+				} else {
+					// Output as regular text
+					if !isTextBlockOpen {
+						contentBlockIndex++
+						isTextBlockOpen = true
+						blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
+						sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+						for _, chunk := range sseData {
+							if chunk != "" {
+								out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+							}
+						}
+					}
+					
+					claudeEvent := e.buildClaudeStreamEvent(pendingText, contentBlockIndex)
+					sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
+					for _, chunk := range sseData {
+						if chunk != "" {
+							out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+						}
+					}
+				}
+			}
 			break
 		}
 		if err != nil {
@@ -2035,15 +2197,16 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 				}
 			}

-			// Handle text content with duplicate detection and thinking mode support
+			// Handle text content with thinking mode support
 			if contentDelta != "" {
-				// Check for duplicate content - skip if identical to last content event
-				// Based on AIClient-2-API implementation for Kiro
-				if contentDelta == lastContentEvent {
-					log.Debugf("kiro: skipping duplicate content event (len: %d)", len(contentDelta))
-					continue
+				// DIAGNOSTIC: Check for thinking tags in response
+				if strings.Contains(contentDelta, "<thinking>") || strings.Contains(contentDelta, "</thinking>") {
+					log.Infof("kiro: DIAGNOSTIC - Found thinking tag in response (len: %d)", len(contentDelta))
 				}
-				lastContentEvent = contentDelta
+
+				// NOTE: Duplicate content filtering was removed because it incorrectly
+				// filtered out legitimate repeated content (like consecutive newlines "\n\n").
+				// Streaming naturally can have identical chunks that are valid content.

 				outputLen += len(contentDelta)
 				// Accumulate content for streaming token calculation