mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-03-09 15:25:17 +00:00
Merge pull request #252 from DragonBaiMo/fix/kiro-thinking-stream-dedup
fix(kiro): stop duplicated thinking on OpenAI and preserve Claude multi-turn thinking
This commit is contained in:
@@ -2551,6 +2551,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
isThinkingBlockOpen := false // Track if thinking content block SSE event is open
|
||||
thinkingBlockIndex := -1 // Index of the thinking content block
|
||||
var accumulatedThinkingContent strings.Builder // Accumulate thinking content for token counting
|
||||
hasOfficialReasoningEvent := false // Disable tag parsing after official reasoning events appear
|
||||
|
||||
// Buffer for handling partial tag matches at chunk boundaries
|
||||
var pendingContent strings.Builder // Buffer content that might be part of a tag
|
||||
@@ -2986,6 +2987,31 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
lastUsageUpdateTime = time.Now()
|
||||
}
|
||||
|
||||
if hasOfficialReasoningEvent {
|
||||
processText := strings.TrimSpace(strings.ReplaceAll(strings.ReplaceAll(contentDelta, kirocommon.ThinkingStartTag, ""), kirocommon.ThinkingEndTag, ""))
|
||||
if processText != "" {
|
||||
if !isTextBlockOpen {
|
||||
contentBlockIndex++
|
||||
isTextBlockOpen = true
|
||||
blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "")
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
}
|
||||
claudeEvent := kiroclaude.BuildClaudeStreamEvent(processText, contentBlockIndex)
|
||||
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam)
|
||||
for _, chunk := range sseData {
|
||||
if chunk != "" {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
|
||||
}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// TAG-BASED THINKING PARSING: Parse <thinking> tags from content
|
||||
// Combine pending content with new content for processing
|
||||
pendingContent.WriteString(contentDelta)
|
||||
@@ -3264,6 +3290,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
|
||||
}
|
||||
|
||||
if thinkingText != "" {
|
||||
hasOfficialReasoningEvent = true
|
||||
// Close text block if open before starting thinking block
|
||||
if isTextBlockOpen && contentBlockIndex >= 0 {
|
||||
blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex)
|
||||
|
||||
@@ -243,13 +243,11 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
|
||||
// Process messages and build history
|
||||
history, currentUserMsg, currentToolResults := processMessages(messages, modelID, origin)
|
||||
|
||||
// Build content with system prompt (only on first turn to avoid re-injection)
|
||||
// Build content with system prompt.
|
||||
// Keep thinking tags on subsequent turns so multi-turn Claude sessions
|
||||
// continue to emit reasoning events.
|
||||
if currentUserMsg != nil {
|
||||
effectiveSystemPrompt := systemPrompt
|
||||
if len(history) > 0 {
|
||||
effectiveSystemPrompt = "" // Don't re-inject on subsequent turns
|
||||
}
|
||||
currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, effectiveSystemPrompt, currentToolResults)
|
||||
currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, systemPrompt, currentToolResults)
|
||||
|
||||
// Deduplicate currentToolResults
|
||||
currentToolResults = deduplicateToolResults(currentToolResults)
|
||||
@@ -475,6 +473,15 @@ func IsThinkingEnabledWithHeaders(body []byte, headers http.Header) bool {
|
||||
}
|
||||
}
|
||||
|
||||
// Check model name directly for thinking hints.
|
||||
// This enables thinking variants even when clients don't send explicit thinking fields.
|
||||
model := strings.TrimSpace(gjson.GetBytes(body, "model").String())
|
||||
modelLower := strings.ToLower(model)
|
||||
if strings.Contains(modelLower, "thinking") || strings.Contains(modelLower, "-reason") {
|
||||
log.Debugf("kiro: thinking mode enabled via model name hint: %s", model)
|
||||
return true
|
||||
}
|
||||
|
||||
log.Debugf("kiro: IsThinkingEnabled returning false (no thinking mode detected)")
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -234,16 +234,16 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
|
||||
// Kiro API supports official thinking/reasoning mode via <thinking_mode> tag.
|
||||
// When set to "enabled", Kiro returns reasoning content as official reasoningContentEvent
|
||||
// rather than inline <thinking> tags in assistantResponseEvent.
|
||||
// We use a high max_thinking_length to allow extensive reasoning.
|
||||
// Use a conservative thinking budget to reduce latency/cost spikes in long sessions.
|
||||
if thinkingEnabled {
|
||||
thinkingHint := `<thinking_mode>enabled</thinking_mode>
|
||||
<max_thinking_length>200000</max_thinking_length>`
|
||||
<max_thinking_length>16000</max_thinking_length>`
|
||||
if systemPrompt != "" {
|
||||
systemPrompt = thinkingHint + "\n\n" + systemPrompt
|
||||
} else {
|
||||
systemPrompt = thinkingHint
|
||||
}
|
||||
log.Debugf("kiro-openai: injected thinking prompt (official mode)")
|
||||
log.Infof("kiro-openai: injected thinking prompt (official mode), has_tools: %v", len(kiroTools) > 0)
|
||||
}
|
||||
|
||||
// Process messages and build history
|
||||
@@ -831,7 +831,6 @@ func hasThinkingTagInBody(body []byte) bool {
|
||||
return strings.Contains(bodyStr, "<thinking_mode>") || strings.Contains(bodyStr, "<max_thinking_length>")
|
||||
}
|
||||
|
||||
|
||||
// extractToolChoiceHint extracts tool_choice from OpenAI request and returns a system prompt hint.
|
||||
// OpenAI tool_choice values:
|
||||
// - "none": Don't use any tools
|
||||
|
||||
Reference in New Issue
Block a user