feat(kiro): enhance thinking support and fix truncation issues

- **Thinking Support**:
    - Enabled thinking support for all Kiro Claude models, including Haiku 4.5 and agentic variants.
    - Updated `model_definitions.go` with thinking configuration (Min: 1024, Max: 32000, ZeroAllowed: true).
    - Fixed `extended_thinking` field names in `model_registry.go` (from `min_budget`/`max_budget` to `min`/`max`) to comply with Claude API specs, enabling thinking control in clients like Claude Code.

- **Kiro Executor Fixes**:
    - Fixed `budget_tokens` handling: explicitly disable thinking when budget is 0 or negative.
    - Removed aggressive duplicate content filtering logic that caused truncation/data loss.
    - Enhanced thinking tag parsing with `extractThinkingFromContent` to correctly handle interleaved thinking/text blocks.
    - Added EOF handling to flush pending thinking tag characters, preventing data loss at stream end.

- **Performance**:
    - Optimized Claude stream handler (v6.2) with reduced buffer size (4KB) and faster flush interval (50ms) to minimize latency and prevent timeouts.
This commit is contained in:
Ravens2121
2025-12-13 03:51:14 +08:00
parent 05b499fb83
commit db80b20bc2
4 changed files with 219 additions and 28 deletions

View File

@@ -895,6 +895,7 @@ func GetKiroModels() []*ModelInfo {
Description: "Claude Opus 4.5 via Kiro (2.2x credit)",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "kiro-claude-sonnet-4-5",
@@ -906,6 +907,7 @@ func GetKiroModels() []*ModelInfo {
Description: "Claude Sonnet 4.5 via Kiro (1.3x credit)",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "kiro-claude-sonnet-4",
@@ -917,6 +919,7 @@ func GetKiroModels() []*ModelInfo {
Description: "Claude Sonnet 4 via Kiro (1.3x credit)",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "kiro-claude-haiku-4-5",
@@ -928,6 +931,7 @@ func GetKiroModels() []*ModelInfo {
Description: "Claude Haiku 4.5 via Kiro (0.4x credit)",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
},
// --- Agentic Variants (Optimized for coding agents with chunked writes) ---
{
@@ -940,6 +944,7 @@ func GetKiroModels() []*ModelInfo {
Description: "Claude Opus 4.5 optimized for coding agents (chunked writes)",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "kiro-claude-sonnet-4-5-agentic",
@@ -951,6 +956,7 @@ func GetKiroModels() []*ModelInfo {
Description: "Claude Sonnet 4.5 optimized for coding agents (chunked writes)",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "kiro-claude-sonnet-4-agentic",
@@ -962,6 +968,7 @@ func GetKiroModels() []*ModelInfo {
Description: "Claude Sonnet 4 optimized for coding agents (chunked writes)",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "kiro-claude-haiku-4-5-agentic",
@@ -973,6 +980,7 @@ func GetKiroModels() []*ModelInfo {
Description: "Claude Haiku 4.5 optimized for coding agents (chunked writes)",
ContextLength: 200000,
MaxCompletionTokens: 64000,
Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
},
}
}

View File

@@ -748,7 +748,8 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
}
return result
case "claude":
case "claude", "kiro", "antigravity":
// Claude, Kiro, and Antigravity all use Claude-compatible format for Claude Code client
result := map[string]any{
"id": model.ID,
"object": "model",
@@ -763,6 +764,19 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
if model.DisplayName != "" {
result["display_name"] = model.DisplayName
}
// Add thinking support for Claude Code client
// Claude Code checks for "thinking" field (simple boolean) to enable tab toggle
// Also add "extended_thinking" for detailed budget info
if model.Thinking != nil {
result["thinking"] = true
result["extended_thinking"] = map[string]any{
"supported": true,
"min": model.Thinking.Min,
"max": model.Thinking.Max,
"zero_allowed": model.Thinking.ZeroAllowed,
"dynamic_allowed": model.Thinking.DynamicAllowed,
}
}
return result
case "gemini":

View File

@@ -1118,10 +1118,18 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
// Read budget_tokens if specified - this value comes from:
// - Claude API: thinking.budget_tokens directly
// - OpenAI API: reasoning_effort -> budget_tokens (low:4000, medium:16000, high:32000)
if bt := thinkingField.Get("budget_tokens"); bt.Exists() && bt.Int() > 0 {
if bt := thinkingField.Get("budget_tokens"); bt.Exists() {
budgetTokens = bt.Int()
// If budget_tokens <= 0, disable thinking explicitly
// This allows users to disable thinking by setting budget_tokens to 0
if budgetTokens <= 0 {
thinkingEnabled = false
log.Debugf("kiro: thinking mode disabled via budget_tokens <= 0")
}
}
if thinkingEnabled {
log.Debugf("kiro: thinking mode enabled via Claude API parameter, budget_tokens: %d", budgetTokens)
}
log.Debugf("kiro: thinking mode enabled via Claude API parameter, budget_tokens: %d", budgetTokens)
}
}
@@ -1737,15 +1745,23 @@ func getString(m map[string]interface{}, key string) string {
// buildClaudeResponse constructs a Claude-compatible response.
// Supports tool_use blocks when tools are present in the response.
// Supports thinking blocks - parses <thinking> tags and converts to Claude thinking content blocks.
func (e *KiroExecutor) buildClaudeResponse(content string, toolUses []kiroToolUse, model string, usageInfo usage.Detail) []byte {
var contentBlocks []map[string]interface{}
// Add text content if present
// Extract thinking blocks and text from content
// This handles <thinking>...</thinking> tags from Kiro's response
if content != "" {
contentBlocks = append(contentBlocks, map[string]interface{}{
"type": "text",
"text": content,
})
blocks := e.extractThinkingFromContent(content)
contentBlocks = append(contentBlocks, blocks...)
// DIAGNOSTIC: Log if thinking blocks were extracted
for _, block := range blocks {
if block["type"] == "thinking" {
thinkingContent := block["thinking"].(string)
log.Infof("kiro: buildClaudeResponse extracted thinking block (len: %d)", len(thinkingContent))
}
}
}
// Add tool_use blocks
@@ -1788,6 +1804,101 @@ func (e *KiroExecutor) buildClaudeResponse(content string, toolUses []kiroToolUs
return result
}
// extractThinkingFromContent parses content to extract thinking blocks and text.
// Returns a list of content blocks in the order they appear in the content.
// Handles interleaved thinking and text blocks correctly.
// Based on the streaming implementation's thinking tag handling.
func (e *KiroExecutor) extractThinkingFromContent(content string) []map[string]interface{} {
var blocks []map[string]interface{}
if content == "" {
return blocks
}
// Check if content contains thinking tags at all
if !strings.Contains(content, thinkingStartTag) {
// No thinking tags, return as plain text
return []map[string]interface{}{
{
"type": "text",
"text": content,
},
}
}
log.Debugf("kiro: extractThinkingFromContent - found thinking tags in content (len: %d)", len(content))
remaining := content
for len(remaining) > 0 {
// Look for <thinking> tag
startIdx := strings.Index(remaining, thinkingStartTag)
if startIdx == -1 {
// No more thinking tags, add remaining as text
if strings.TrimSpace(remaining) != "" {
blocks = append(blocks, map[string]interface{}{
"type": "text",
"text": remaining,
})
}
break
}
// Add text before thinking tag (if any meaningful content)
if startIdx > 0 {
textBefore := remaining[:startIdx]
if strings.TrimSpace(textBefore) != "" {
blocks = append(blocks, map[string]interface{}{
"type": "text",
"text": textBefore,
})
}
}
// Move past the opening tag
remaining = remaining[startIdx+len(thinkingStartTag):]
// Find closing tag
endIdx := strings.Index(remaining, thinkingEndTag)
if endIdx == -1 {
// No closing tag found, treat rest as thinking content (incomplete response)
if strings.TrimSpace(remaining) != "" {
blocks = append(blocks, map[string]interface{}{
"type": "thinking",
"thinking": remaining,
})
log.Warnf("kiro: extractThinkingFromContent - missing closing </thinking> tag")
}
break
}
// Extract thinking content between tags
thinkContent := remaining[:endIdx]
if strings.TrimSpace(thinkContent) != "" {
blocks = append(blocks, map[string]interface{}{
"type": "thinking",
"thinking": thinkContent,
})
log.Debugf("kiro: extractThinkingFromContent - extracted thinking block (len: %d)", len(thinkContent))
}
// Move past the closing tag
remaining = remaining[endIdx+len(thinkingEndTag):]
}
// If no blocks were created (all whitespace), return empty text block
if len(blocks) == 0 {
blocks = append(blocks, map[string]interface{}{
"type": "text",
"text": "",
})
}
return blocks
}
// NOTE: Tool uses are now extracted from API response, not parsed from text
@@ -1804,9 +1915,10 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
processedIDs := make(map[string]bool)
var currentToolUse *toolUseState
// Duplicate content detection - tracks last content event to filter duplicates
// Based on AIClient-2-API implementation for Kiro
var lastContentEvent string
// NOTE: Duplicate content filtering removed - it was causing legitimate repeated
// content (like consecutive newlines) to be incorrectly filtered out.
// The previous implementation compared lastContentEvent == contentDelta which
// is too aggressive for streaming scenarios.
// Streaming token calculation - accumulate content for real-time token counting
// Based on AIClient-2-API implementation
@@ -1905,6 +2017,56 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
hasToolUses = true
currentToolUse = nil
}
// Flush any pending tag characters at EOF
// These are partial tag prefixes that were held back waiting for more data
// Since no more data is coming, output them as regular text
var pendingText string
if pendingStartTagChars > 0 {
pendingText = thinkingStartTag[:pendingStartTagChars]
log.Debugf("kiro: flushing pending start tag chars at EOF: %q", pendingText)
pendingStartTagChars = 0
}
if pendingEndTagChars > 0 {
pendingText += thinkingEndTag[:pendingEndTagChars]
log.Debugf("kiro: flushing pending end tag chars at EOF: %q", pendingText)
pendingEndTagChars = 0
}
// Output pending text if any
if pendingText != "" {
// If we're in a thinking block, output as thinking content
if inThinkBlock && isThinkingBlockOpen {
thinkingEvent := e.buildClaudeThinkingDeltaEvent(pendingText, thinkingBlockIndex)
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, thinkingEvent, &translatorParam)
for _, chunk := range sseData {
if chunk != "" {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
} else {
// Output as regular text
if !isTextBlockOpen {
contentBlockIndex++
isTextBlockOpen = true
blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
for _, chunk := range sseData {
if chunk != "" {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
}
claudeEvent := e.buildClaudeStreamEvent(pendingText, contentBlockIndex)
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
for _, chunk := range sseData {
if chunk != "" {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
}
}
break
}
if err != nil {
@@ -2035,15 +2197,16 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
}
}
// Handle text content with duplicate detection and thinking mode support
// Handle text content with thinking mode support
if contentDelta != "" {
// Check for duplicate content - skip if identical to last content event
// Based on AIClient-2-API implementation for Kiro
if contentDelta == lastContentEvent {
log.Debugf("kiro: skipping duplicate content event (len: %d)", len(contentDelta))
continue
// DIAGNOSTIC: Check for thinking tags in response
if strings.Contains(contentDelta, "<thinking>") || strings.Contains(contentDelta, "</thinking>") {
log.Infof("kiro: DIAGNOSTIC - Found thinking tag in response (len: %d)", len(contentDelta))
}
lastContentEvent = contentDelta
// NOTE: Duplicate content filtering was removed because it incorrectly
// filtered out legitimate repeated content (like consecutive newlines "\n\n").
// Streaming naturally can have identical chunks that are valid content.
outputLen += len(contentDelta)
// Accumulate content for streaming token calculation