From 01cf2211671307f297ef9774b0c6dfd31f2f98b4 Mon Sep 17 00:00:00 2001 From: Ravens2121 Date: Sun, 14 Dec 2025 06:58:50 +0800 Subject: [PATCH] =?UTF-8?q?feat(kiro):=20=E4=BB=A3=E7=A0=81=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E9=87=8D=E6=9E=84=20+=20OpenAI=E7=BF=BB=E8=AF=91?= =?UTF-8?q?=E5=99=A8=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/api/modules/amp/response_rewriter.go | 60 +- internal/runtime/executor/kiro_executor.go | 3206 ++++------------- internal/translator/init.go | 2 +- internal/translator/kiro/claude/init.go | 5 +- .../translator/kiro/claude/kiro_claude.go | 18 +- .../kiro/claude/kiro_claude_request.go | 603 ++++ .../kiro/claude/kiro_claude_response.go | 184 + .../kiro/claude/kiro_claude_stream.go | 176 + .../kiro/claude/kiro_claude_tools.go | 522 +++ internal/translator/kiro/common/constants.go | 66 + .../translator/kiro/common/message_merge.go | 125 + internal/translator/kiro/common/utils.go | 16 + .../chat-completions/kiro_openai_request.go | 348 -- .../chat-completions/kiro_openai_response.go | 404 --- .../openai/{chat-completions => }/init.go | 13 +- .../translator/kiro/openai/kiro_openai.go | 368 ++ .../kiro/openai/kiro_openai_request.go | 604 ++++ .../kiro/openai/kiro_openai_response.go | 264 ++ .../kiro/openai/kiro_openai_stream.go | 207 ++ 19 files changed, 3898 insertions(+), 3293 deletions(-) create mode 100644 internal/translator/kiro/claude/kiro_claude_request.go create mode 100644 internal/translator/kiro/claude/kiro_claude_response.go create mode 100644 internal/translator/kiro/claude/kiro_claude_stream.go create mode 100644 internal/translator/kiro/claude/kiro_claude_tools.go create mode 100644 internal/translator/kiro/common/constants.go create mode 100644 internal/translator/kiro/common/message_merge.go create mode 100644 internal/translator/kiro/common/utils.go delete mode 100644 internal/translator/kiro/openai/chat-completions/kiro_openai_request.go delete mode 100644 internal/translator/kiro/openai/chat-completions/kiro_openai_response.go rename internal/translator/kiro/openai/{chat-completions => }/init.go (56%) create mode 100644 internal/translator/kiro/openai/kiro_openai.go create mode 100644 internal/translator/kiro/openai/kiro_openai_request.go create mode 100644 internal/translator/kiro/openai/kiro_openai_response.go create mode 100644 internal/translator/kiro/openai/kiro_openai_stream.go diff --git a/internal/api/modules/amp/response_rewriter.go b/internal/api/modules/amp/response_rewriter.go index e906f143..d78af9f1 100644 --- a/internal/api/modules/amp/response_rewriter.go +++ b/internal/api/modules/amp/response_rewriter.go @@ -29,15 +29,71 @@ func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRe } } +const maxBufferedResponseBytes = 2 * 1024 * 1024 // 2MB safety cap + +func looksLikeSSEChunk(data []byte) bool { + // Fallback detection: some upstreams may omit/lie about Content-Type, causing SSE to be buffered. + // Heuristics are intentionally simple and cheap. + return bytes.Contains(data, []byte("data:")) || + bytes.Contains(data, []byte("event:")) || + bytes.Contains(data, []byte("message_start")) || + bytes.Contains(data, []byte("message_delta")) || + bytes.Contains(data, []byte("content_block_start")) || + bytes.Contains(data, []byte("content_block_delta")) || + bytes.Contains(data, []byte("content_block_stop")) || + bytes.Contains(data, []byte("\n\n")) +} + +func (rw *ResponseRewriter) enableStreaming(reason string) error { + if rw.isStreaming { + return nil + } + rw.isStreaming = true + + // Flush any previously buffered data to avoid reordering or data loss. + if rw.body != nil && rw.body.Len() > 0 { + buf := rw.body.Bytes() + // Copy before Reset() to keep bytes stable. + toFlush := make([]byte, len(buf)) + copy(toFlush, buf) + rw.body.Reset() + + if _, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(toFlush)); err != nil { + return err + } + if flusher, ok := rw.ResponseWriter.(http.Flusher); ok { + flusher.Flush() + } + } + + log.Debugf("amp response rewriter: switched to streaming (%s)", reason) + return nil +} + // Write intercepts response writes and buffers them for model name replacement func (rw *ResponseRewriter) Write(data []byte) (int, error) { - // Detect streaming on first write - if rw.body.Len() == 0 && !rw.isStreaming { + // Detect streaming on first write (header-based) + if !rw.isStreaming && rw.body.Len() == 0 { contentType := rw.Header().Get("Content-Type") rw.isStreaming = strings.Contains(contentType, "text/event-stream") || strings.Contains(contentType, "stream") } + if !rw.isStreaming { + // Content-based fallback: detect SSE-like chunks even if Content-Type is missing/wrong. + if looksLikeSSEChunk(data) { + if err := rw.enableStreaming("sse heuristic"); err != nil { + return 0, err + } + } else if rw.body.Len()+len(data) > maxBufferedResponseBytes { + // Safety cap: avoid unbounded buffering on large responses. + log.Warnf("amp response rewriter: buffer exceeded %d bytes, switching to streaming", maxBufferedResponseBytes) + if err := rw.enableStreaming("buffer limit"); err != nil { + return 0, err + } + } + } + if rw.isStreaming { return rw.ResponseWriter.Write(rw.rewriteStreamChunk(data)) } diff --git a/internal/runtime/executor/kiro_executor.go b/internal/runtime/executor/kiro_executor.go index cbc5443b..1d4d85a5 100644 --- a/internal/runtime/executor/kiro_executor.go +++ b/internal/runtime/executor/kiro_executor.go @@ -10,38 +10,34 @@ import ( "fmt" "io" "net/http" - "regexp" "strings" "sync" "time" - "unicode/utf8" "github.com/google/uuid" kiroauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + kiroclaude "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/claude" + kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" log "github.com/sirupsen/logrus" - "github.com/tidwall/gjson" - "github.com/gin-gonic/gin" ) const ( // Kiro API common constants - kiroContentType = "application/x-amz-json-1.0" - kiroAcceptStream = "*/*" - kiroMaxMessageSize = 10 * 1024 * 1024 // 10MB max message size for event stream - kiroMaxToolDescLen = 10237 // Kiro API limit is 10240 bytes, leave room for "..." + kiroContentType = "application/x-amz-json-1.0" + kiroAcceptStream = "*/*" // Event Stream frame size constants for boundary protection // AWS Event Stream binary format: prelude (12 bytes) + headers + payload + message_crc (4 bytes) // Prelude consists of: total_length (4) + headers_length (4) + prelude_crc (4) - minEventStreamFrameSize = 16 // Minimum: 4(total_len) + 4(headers_len) + 4(prelude_crc) + 4(message_crc) - maxEventStreamMsgSize = 10 << 20 // Maximum message length: 10MB + minEventStreamFrameSize = 16 // Minimum: 4(total_len) + 4(headers_len) + 4(prelude_crc) + 4(message_crc) + maxEventStreamMsgSize = 10 << 20 // Maximum message length: 10MB // Event Stream error type constants ErrStreamFatal = "fatal" // Connection/authentication errors, not recoverable @@ -50,73 +46,13 @@ const ( kiroUserAgent = "aws-sdk-rust/1.3.9 os/macos lang/rust/1.87.0" // kiroFullUserAgent is the complete x-amz-user-agent header matching amq2api kiroFullUserAgent = "aws-sdk-rust/1.3.9 ua/2.1 api/ssooidc/1.88.0 os/macos lang/rust/1.87.0 m/E app/AmazonQ-For-CLI" - - // Thinking mode support - based on amq2api implementation - // These tags wrap reasoning content in the response stream - thinkingStartTag = "" - thinkingEndTag = "" - // thinkingHint is injected into the request to enable interleaved thinking mode - // This tells the model to use thinking tags and sets the max thinking length - thinkingHint = "interleaved16000" - - // kiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes. - // AWS Kiro API has a 2-3 minute timeout for large file write operations. - kiroAgenticSystemPrompt = ` -# CRITICAL: CHUNKED WRITE PROTOCOL (MANDATORY) - -You MUST follow these rules for ALL file operations. Violation causes server timeouts and task failure. - -## ABSOLUTE LIMITS -- **MAXIMUM 350 LINES** per single write/edit operation - NO EXCEPTIONS -- **RECOMMENDED 300 LINES** or less for optimal performance -- **NEVER** write entire files in one operation if >300 lines - -## MANDATORY CHUNKED WRITE STRATEGY - -### For NEW FILES (>300 lines total): -1. FIRST: Write initial chunk (first 250-300 lines) using write_to_file/fsWrite -2. THEN: Append remaining content in 250-300 line chunks using file append operations -3. REPEAT: Continue appending until complete - -### For EDITING EXISTING FILES: -1. Use surgical edits (apply_diff/targeted edits) - change ONLY what's needed -2. NEVER rewrite entire files - use incremental modifications -3. Split large refactors into multiple small, focused edits - -### For LARGE CODE GENERATION: -1. Generate in logical sections (imports, types, functions separately) -2. Write each section as a separate operation -3. Use append operations for subsequent sections - -## EXAMPLES OF CORRECT BEHAVIOR - -✅ CORRECT: Writing a 600-line file -- Operation 1: Write lines 1-300 (initial file creation) -- Operation 2: Append lines 301-600 - -✅ CORRECT: Editing multiple functions -- Operation 1: Edit function A -- Operation 2: Edit function B -- Operation 3: Edit function C - -❌ WRONG: Writing 500 lines in single operation → TIMEOUT -❌ WRONG: Rewriting entire file to change 5 lines → TIMEOUT -❌ WRONG: Generating massive code blocks without chunking → TIMEOUT - -## WHY THIS MATTERS -- Server has 2-3 minute timeout for operations -- Large writes exceed timeout and FAIL completely -- Chunked writes are FASTER and more RELIABLE -- Failed writes waste time and require retry - -REMEMBER: When in doubt, write LESS per operation. Multiple small operations > one large operation.` ) // Real-time usage estimation configuration // These control how often usage updates are sent during streaming var ( - usageUpdateCharThreshold = 5000 // Send usage update every 5000 characters - usageUpdateTimeInterval = 15 * time.Second // Or every 15 seconds, whichever comes first + usageUpdateCharThreshold = 5000 // Send usage update every 5000 characters + usageUpdateTimeInterval = 15 * time.Second // Or every 15 seconds, whichever comes first ) // kiroEndpointConfig bundles endpoint URL with its compatible Origin and AmzTarget values. @@ -186,7 +122,7 @@ func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig { } preference = strings.ToLower(strings.TrimSpace(preference)) - + // Create new slice to avoid modifying global state var sorted []kiroEndpointConfig var remaining []kiroEndpointConfig @@ -221,8 +157,8 @@ func getKiroEndpointConfigs(auth *cliproxyauth.Auth) []kiroEndpointConfig { // KiroExecutor handles requests to AWS CodeWhisperer (Kiro) API. type KiroExecutor struct { - cfg *config.Config - refreshMu sync.Mutex // Serializes token refresh operations to prevent race conditions + cfg *config.Config + refreshMu sync.Mutex // Serializes token refresh operations to prevent race conditions } // NewKiroExecutor creates a new Kiro executor instance. @@ -236,7 +172,6 @@ func (e *KiroExecutor) Identifier() string { return "kiro" } // PrepareRequest prepares the HTTP request before execution. func (e *KiroExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error { return nil } - // Execute sends the request to Kiro API and returns the response. // Supports automatic token refresh on 401/403 errors. func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { @@ -244,14 +179,6 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req if accessToken == "" { return resp, fmt.Errorf("kiro: access token not found in auth") } - if profileArn == "" { - // Only warn if not using builder-id auth (which doesn't need profileArn) - if auth == nil || auth.Metadata == nil { - log.Debugf("kiro: profile ARN not found in auth (may be normal for builder-id)") - } else if authMethod, ok := auth.Metadata["auth_method"].(string); !ok || authMethod != "builder-id" { - log.Warnf("kiro: profile ARN not found in auth, API calls may fail") - } - } reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) @@ -274,31 +201,14 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) kiroModelID := e.mapModelToKiro(req.Model) - - // Check if this is an agentic model variant - isAgentic := strings.HasSuffix(req.Model, "-agentic") - - // Check if this is a chat-only model variant (no tool calling) - isChatOnly := strings.HasSuffix(req.Model, "-chat") - - // Determine initial origin - always use AI_EDITOR to match AIClient-2-API behavior - // AIClient-2-API uses AI_EDITOR for all models, which is the Kiro IDE quota - // Note: CLI origin is for Amazon Q quota, but AIClient-2-API doesn't use it - currentOrigin := "AI_EDITOR" - - // Determine if profileArn should be included based on auth method - // profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO) - effectiveProfileArn := profileArn - if auth != nil && auth.Metadata != nil { - if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" { - effectiveProfileArn = "" // Don't include profileArn for builder-id auth - } - } - - kiroPayload := e.buildKiroPayload(body, kiroModelID, effectiveProfileArn, currentOrigin, isAgentic, isChatOnly) + + // Determine agentic mode and effective profile ARN using helper functions + isAgentic, isChatOnly := determineAgenticMode(req.Model) + effectiveProfileArn := getEffectiveProfileArnWithWarning(auth, profileArn) // Execute with retry on 401/403 and 429 (quota exhausted) - resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, kiroPayload, body, from, to, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly) + // Note: currentOrigin and kiroPayload are built inside executeWithRetry for each endpoint + resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, to, reporter, "", kiroModelID, isAgentic, isChatOnly) return resp, err } @@ -311,247 +221,252 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. var resp cliproxyexecutor.Response maxRetries := 2 // Allow retries for token refresh + endpoint fallback endpointConfigs := getKiroEndpointConfigs(auth) + var last429Err error for endpointIdx := 0; endpointIdx < len(endpointConfigs); endpointIdx++ { endpointConfig := endpointConfigs[endpointIdx] url := endpointConfig.URL // Use this endpoint's compatible Origin (critical for avoiding 403 errors) currentOrigin = endpointConfig.Origin - + // Rebuild payload with the correct origin for this endpoint // Each endpoint requires its matching Origin value in the request body - kiroPayload = e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) - + kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + log.Debugf("kiro: trying endpoint %d/%d: %s (Name: %s, Origin: %s)", endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin) - for attempt := 0; attempt <= maxRetries; attempt++ { - httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(kiroPayload)) - if err != nil { - return resp, err - } - - httpReq.Header.Set("Content-Type", kiroContentType) - httpReq.Header.Set("Authorization", "Bearer "+accessToken) - httpReq.Header.Set("Accept", kiroAcceptStream) - // Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors) - httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget) - httpReq.Header.Set("User-Agent", kiroUserAgent) - httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent) - httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3") - httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String()) - - var attrs map[string]string - if auth != nil { - attrs = auth.Attributes - } - util.ApplyCustomHeadersFromAttrs(httpReq, attrs) - - var authID, authLabel, authType, authValue string - if auth != nil { - authID = auth.ID - authLabel = auth.Label - authType, authValue = auth.AccountInfo() - } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ - URL: url, - Method: http.MethodPost, - Headers: httpReq.Header.Clone(), - Body: kiroPayload, - Provider: e.Identifier(), - AuthID: authID, - AuthLabel: authLabel, - AuthType: authType, - AuthValue: authValue, - }) - - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 120*time.Second) - httpResp, err := httpClient.Do(httpReq) - if err != nil { - recordAPIResponseError(ctx, e.cfg, err) - return resp, err - } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) - - // Handle 429 errors (quota exhausted) - try next endpoint - // Each endpoint has its own quota pool, so we can try different endpoints - if httpResp.StatusCode == 429 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) - - log.Warnf("kiro: %s endpoint quota exhausted (429), will try next endpoint", endpointConfig.Name) - - // Break inner retry loop to try next endpoint (which has different quota) - break - } - - // Handle 5xx server errors with exponential backoff retry - if httpResp.StatusCode >= 500 && httpResp.StatusCode < 600 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) - - if attempt < maxRetries { - // Exponential backoff: 1s, 2s, 4s... (max 30s) - backoff := time.Duration(1< 30*time.Second { - backoff = 30 * time.Second - } - log.Warnf("kiro: server error %d, retrying in %v (attempt %d/%d)", httpResp.StatusCode, backoff, attempt+1, maxRetries) - time.Sleep(backoff) - continue + for attempt := 0; attempt <= maxRetries; attempt++ { + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(kiroPayload)) + if err != nil { + return resp, err } - log.Errorf("kiro: server error %d after %d retries", httpResp.StatusCode, maxRetries) - return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } - // Handle 401 errors with token refresh and retry - // 401 = Unauthorized (token expired/invalid) - refresh token - if httpResp.StatusCode == 401 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) + httpReq.Header.Set("Content-Type", kiroContentType) + httpReq.Header.Set("Authorization", "Bearer "+accessToken) + httpReq.Header.Set("Accept", kiroAcceptStream) + // Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors) + httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget) + httpReq.Header.Set("User-Agent", kiroUserAgent) + httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent) + httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3") + httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String()) - if attempt < maxRetries { - log.Warnf("kiro: received 401 error, attempting token refresh and retry (attempt %d/%d)", attempt+1, maxRetries+1) + var attrs map[string]string + if auth != nil { + attrs = auth.Attributes + } + util.ApplyCustomHeadersFromAttrs(httpReq, attrs) - refreshedAuth, refreshErr := e.Refresh(ctx, auth) - if refreshErr != nil { - log.Errorf("kiro: token refresh failed: %v", refreshErr) - return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } + var authID, authLabel, authType, authValue string + if auth != nil { + authID = auth.ID + authLabel = auth.Label + authType, authValue = auth.AccountInfo() + } + recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + URL: url, + Method: http.MethodPost, + Headers: httpReq.Header.Clone(), + Body: kiroPayload, + Provider: e.Identifier(), + AuthID: authID, + AuthLabel: authLabel, + AuthType: authType, + AuthValue: authValue, + }) - if refreshedAuth != nil { - auth = refreshedAuth - accessToken, profileArn = kiroCredentials(auth) - // Rebuild payload with new profile ARN if changed - kiroPayload = e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) - log.Infof("kiro: token refreshed successfully, retrying request") + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 120*time.Second) + httpResp, err := httpClient.Do(httpReq) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return resp, err + } + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) + + // Handle 429 errors (quota exhausted) - try next endpoint + // Each endpoint has its own quota pool, so we can try different endpoints + if httpResp.StatusCode == 429 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) + + // Preserve last 429 so callers can correctly backoff when all endpoints are exhausted + last429Err = statusErr{code: httpResp.StatusCode, msg: string(respBody)} + + log.Warnf("kiro: %s endpoint quota exhausted (429), will try next endpoint, body: %s", + endpointConfig.Name, summarizeErrorBody(httpResp.Header.Get("Content-Type"), respBody)) + + // Break inner retry loop to try next endpoint (which has different quota) + break + } + + // Handle 5xx server errors with exponential backoff retry + if httpResp.StatusCode >= 500 && httpResp.StatusCode < 600 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) + + if attempt < maxRetries { + // Exponential backoff: 1s, 2s, 4s... (max 30s) + backoff := time.Duration(1< 30*time.Second { + backoff = 30 * time.Second + } + log.Warnf("kiro: server error %d, retrying in %v (attempt %d/%d)", httpResp.StatusCode, backoff, attempt+1, maxRetries) + time.Sleep(backoff) continue } + log.Errorf("kiro: server error %d after %d retries", httpResp.StatusCode, maxRetries) + return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} } - log.Warnf("kiro request error, status: 401, body: %s", summarizeErrorBody(httpResp.Header.Get("Content-Type"), respBody)) - return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } + // Handle 401 errors with token refresh and retry + // 401 = Unauthorized (token expired/invalid) - refresh token + if httpResp.StatusCode == 401 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) - // Handle 402 errors - Monthly Limit Reached - if httpResp.StatusCode == 402 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) + if attempt < maxRetries { + log.Warnf("kiro: received 401 error, attempting token refresh and retry (attempt %d/%d)", attempt+1, maxRetries+1) - log.Warnf("kiro: received 402 (monthly limit). Upstream body: %s", string(respBody)) + refreshedAuth, refreshErr := e.Refresh(ctx, auth) + if refreshErr != nil { + log.Errorf("kiro: token refresh failed: %v", refreshErr) + return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} + } - // Return upstream error body directly - return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } - - // Handle 403 errors - Access Denied / Token Expired - // Do NOT switch endpoints for 403 errors - if httpResp.StatusCode == 403 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) - - // Log the 403 error details for debugging - log.Warnf("kiro: received 403 error (attempt %d/%d), body: %s", attempt+1, maxRetries+1, summarizeErrorBody(httpResp.Header.Get("Content-Type"), respBody)) - - respBodyStr := string(respBody) - - // Check for SUSPENDED status - return immediately without retry - if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") { - log.Errorf("kiro: account is suspended, cannot proceed") - return resp, statusErr{code: httpResp.StatusCode, msg: "account suspended: " + string(respBody)} - } - - // Check if this looks like a token-related 403 (some APIs return 403 for expired tokens) - isTokenRelated := strings.Contains(respBodyStr, "token") || - strings.Contains(respBodyStr, "expired") || - strings.Contains(respBodyStr, "invalid") || - strings.Contains(respBodyStr, "unauthorized") - - if isTokenRelated && attempt < maxRetries { - log.Warnf("kiro: 403 appears token-related, attempting token refresh") - refreshedAuth, refreshErr := e.Refresh(ctx, auth) - if refreshErr != nil { - log.Errorf("kiro: token refresh failed: %v", refreshErr) - // Token refresh failed - return error immediately - return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } - if refreshedAuth != nil { - auth = refreshedAuth - accessToken, profileArn = kiroCredentials(auth) - kiroPayload = e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) - log.Infof("kiro: token refreshed for 403, retrying request") - continue + if refreshedAuth != nil { + auth = refreshedAuth + accessToken, profileArn = kiroCredentials(auth) + // Rebuild payload with new profile ARN if changed + kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + log.Infof("kiro: token refreshed successfully, retrying request") + continue + } } + + log.Warnf("kiro request error, status: 401, body: %s", summarizeErrorBody(httpResp.Header.Get("Content-Type"), respBody)) + return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} } - // For non-token 403 or after max retries, return error immediately + // Handle 402 errors - Monthly Limit Reached + if httpResp.StatusCode == 402 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) + + log.Warnf("kiro: received 402 (monthly limit). Upstream body: %s", string(respBody)) + + // Return upstream error body directly + return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} + } + + // Handle 403 errors - Access Denied / Token Expired // Do NOT switch endpoints for 403 errors - log.Warnf("kiro: 403 error, returning immediately (no endpoint switch)") - return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } + if httpResp.StatusCode == 403 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) - if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - log.Debugf("kiro request error, status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) - err = statusErr{code: httpResp.StatusCode, msg: string(b)} - if errClose := httpResp.Body.Close(); errClose != nil { - log.Errorf("response body close error: %v", errClose) - } - return resp, err - } + // Log the 403 error details for debugging + log.Warnf("kiro: received 403 error (attempt %d/%d), body: %s", attempt+1, maxRetries+1, summarizeErrorBody(httpResp.Header.Get("Content-Type"), respBody)) - defer func() { - if errClose := httpResp.Body.Close(); errClose != nil { - log.Errorf("response body close error: %v", errClose) - } - }() + respBodyStr := string(respBody) - content, toolUses, usageInfo, stopReason, err := e.parseEventStream(httpResp.Body) - if err != nil { - recordAPIResponseError(ctx, e.cfg, err) - return resp, err - } - - // Fallback for usage if missing from upstream - if usageInfo.TotalTokens == 0 { - if enc, encErr := getTokenizer(req.Model); encErr == nil { - if inp, countErr := countOpenAIChatTokens(enc, opts.OriginalRequest); countErr == nil { - usageInfo.InputTokens = inp + // Check for SUSPENDED status - return immediately without retry + if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") { + log.Errorf("kiro: account is suspended, cannot proceed") + return resp, statusErr{code: httpResp.StatusCode, msg: "account suspended: " + string(respBody)} } + + // Check if this looks like a token-related 403 (some APIs return 403 for expired tokens) + isTokenRelated := strings.Contains(respBodyStr, "token") || + strings.Contains(respBodyStr, "expired") || + strings.Contains(respBodyStr, "invalid") || + strings.Contains(respBodyStr, "unauthorized") + + if isTokenRelated && attempt < maxRetries { + log.Warnf("kiro: 403 appears token-related, attempting token refresh") + refreshedAuth, refreshErr := e.Refresh(ctx, auth) + if refreshErr != nil { + log.Errorf("kiro: token refresh failed: %v", refreshErr) + // Token refresh failed - return error immediately + return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} + } + if refreshedAuth != nil { + auth = refreshedAuth + accessToken, profileArn = kiroCredentials(auth) + kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + log.Infof("kiro: token refreshed for 403, retrying request") + continue + } + } + + // For non-token 403 or after max retries, return error immediately + // Do NOT switch endpoints for 403 errors + log.Warnf("kiro: 403 error, returning immediately (no endpoint switch)") + return resp, statusErr{code: httpResp.StatusCode, msg: string(respBody)} } - if len(content) > 0 { - // Use tiktoken for more accurate output token calculation + + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + b, _ := io.ReadAll(httpResp.Body) + appendAPIResponseChunk(ctx, e.cfg, b) + log.Debugf("kiro request error, status: %d, body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b)) + err = statusErr{code: httpResp.StatusCode, msg: string(b)} + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("response body close error: %v", errClose) + } + return resp, err + } + + defer func() { + if errClose := httpResp.Body.Close(); errClose != nil { + log.Errorf("response body close error: %v", errClose) + } + }() + + content, toolUses, usageInfo, stopReason, err := e.parseEventStream(httpResp.Body) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return resp, err + } + + // Fallback for usage if missing from upstream + if usageInfo.TotalTokens == 0 { if enc, encErr := getTokenizer(req.Model); encErr == nil { - if tokenCount, countErr := enc.Count(content); countErr == nil { - usageInfo.OutputTokens = int64(tokenCount) + if inp, countErr := countOpenAIChatTokens(enc, opts.OriginalRequest); countErr == nil { + usageInfo.InputTokens = inp } } - // Fallback to character count estimation if tiktoken fails - if usageInfo.OutputTokens == 0 { - usageInfo.OutputTokens = int64(len(content) / 4) + if len(content) > 0 { + // Use tiktoken for more accurate output token calculation + if enc, encErr := getTokenizer(req.Model); encErr == nil { + if tokenCount, countErr := enc.Count(content); countErr == nil { + usageInfo.OutputTokens = int64(tokenCount) + } + } + // Fallback to character count estimation if tiktoken fails if usageInfo.OutputTokens == 0 { - usageInfo.OutputTokens = 1 + usageInfo.OutputTokens = int64(len(content) / 4) + if usageInfo.OutputTokens == 0 { + usageInfo.OutputTokens = 1 + } } } + usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens } - usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens - } - appendAPIResponseChunk(ctx, e.cfg, []byte(content)) - reporter.publish(ctx, usageInfo) + appendAPIResponseChunk(ctx, e.cfg, []byte(content)) + reporter.publish(ctx, usageInfo) - // Build response in Claude format for Kiro translator - // stopReason is extracted from upstream response by parseEventStream - kiroResponse := e.buildClaudeResponse(content, toolUses, req.Model, usageInfo, stopReason) - out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, kiroResponse, nil) - resp = cliproxyexecutor.Response{Payload: []byte(out)} - return resp, nil + // Build response in Claude format for Kiro translator + // stopReason is extracted from upstream response by parseEventStream + kiroResponse := kiroclaude.BuildClaudeResponse(content, toolUses, req.Model, usageInfo, stopReason) + out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, kiroResponse, nil) + resp = cliproxyexecutor.Response{Payload: []byte(out)} + return resp, nil } // Inner retry loop exhausted for this endpoint, try next endpoint // Note: This code is unreachable because all paths in the inner loop @@ -559,6 +474,9 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. } // All endpoints exhausted + if last429Err != nil { + return resp, last429Err + } return resp, fmt.Errorf("kiro: all endpoints exhausted") } @@ -569,14 +487,6 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut if accessToken == "" { return nil, fmt.Errorf("kiro: access token not found in auth") } - if profileArn == "" { - // Only warn if not using builder-id auth (which doesn't need profileArn) - if auth == nil || auth.Metadata == nil { - log.Debugf("kiro: profile ARN not found in auth (may be normal for builder-id)") - } else if authMethod, ok := auth.Metadata["auth_method"].(string); !ok || authMethod != "builder-id" { - log.Warnf("kiro: profile ARN not found in auth, API calls may fail") - } - } reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) @@ -599,30 +509,14 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) kiroModelID := e.mapModelToKiro(req.Model) - - // Check if this is an agentic model variant - isAgentic := strings.HasSuffix(req.Model, "-agentic") - - // Check if this is a chat-only model variant (no tool calling) - isChatOnly := strings.HasSuffix(req.Model, "-chat") - - // Determine initial origin - always use AI_EDITOR to match AIClient-2-API behavior - // AIClient-2-API uses AI_EDITOR for all models, which is the Kiro IDE quota - currentOrigin := "AI_EDITOR" - - // Determine if profileArn should be included based on auth method - // profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO) - effectiveProfileArn := profileArn - if auth != nil && auth.Metadata != nil { - if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" { - effectiveProfileArn = "" // Don't include profileArn for builder-id auth - } - } - - kiroPayload := e.buildKiroPayload(body, kiroModelID, effectiveProfileArn, currentOrigin, isAgentic, isChatOnly) + + // Determine agentic mode and effective profile ARN using helper functions + isAgentic, isChatOnly := determineAgenticMode(req.Model) + effectiveProfileArn := getEffectiveProfileArnWithWarning(auth, profileArn) // Execute stream with retry on 401/403 and 429 (quota exhausted) - return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, kiroPayload, body, from, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly) + // Note: currentOrigin and kiroPayload are built inside executeStreamWithRetry for each endpoint + return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, nil, body, from, reporter, "", kiroModelID, isAgentic, isChatOnly) } // executeStreamWithRetry performs the streaming HTTP request with automatic retry on auth errors. @@ -633,233 +527,238 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, accessToken, profileArn string, kiroPayload, body []byte, from sdktranslator.Format, reporter *usageReporter, currentOrigin, kiroModelID string, isAgentic, isChatOnly bool) (<-chan cliproxyexecutor.StreamChunk, error) { maxRetries := 2 // Allow retries for token refresh + endpoint fallback endpointConfigs := getKiroEndpointConfigs(auth) + var last429Err error for endpointIdx := 0; endpointIdx < len(endpointConfigs); endpointIdx++ { endpointConfig := endpointConfigs[endpointIdx] url := endpointConfig.URL // Use this endpoint's compatible Origin (critical for avoiding 403 errors) currentOrigin = endpointConfig.Origin - + // Rebuild payload with the correct origin for this endpoint // Each endpoint requires its matching Origin value in the request body - kiroPayload = e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) - + kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + log.Debugf("kiro: stream trying endpoint %d/%d: %s (Name: %s, Origin: %s)", endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin) - for attempt := 0; attempt <= maxRetries; attempt++ { - httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(kiroPayload)) - if err != nil { - return nil, err - } - - httpReq.Header.Set("Content-Type", kiroContentType) - httpReq.Header.Set("Authorization", "Bearer "+accessToken) - httpReq.Header.Set("Accept", kiroAcceptStream) - // Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors) - httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget) - httpReq.Header.Set("User-Agent", kiroUserAgent) - httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent) - httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3") - httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String()) - - var attrs map[string]string - if auth != nil { - attrs = auth.Attributes - } - util.ApplyCustomHeadersFromAttrs(httpReq, attrs) - - var authID, authLabel, authType, authValue string - if auth != nil { - authID = auth.ID - authLabel = auth.Label - authType, authValue = auth.AccountInfo() - } - recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ - URL: url, - Method: http.MethodPost, - Headers: httpReq.Header.Clone(), - Body: kiroPayload, - Provider: e.Identifier(), - AuthID: authID, - AuthLabel: authLabel, - AuthType: authType, - AuthValue: authValue, - }) - - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) - httpResp, err := httpClient.Do(httpReq) - if err != nil { - recordAPIResponseError(ctx, e.cfg, err) - return nil, err - } - recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) - - // Handle 429 errors (quota exhausted) - try next endpoint - // Each endpoint has its own quota pool, so we can try different endpoints - if httpResp.StatusCode == 429 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) - - log.Warnf("kiro: stream %s endpoint quota exhausted (429), will try next endpoint", endpointConfig.Name) - - // Break inner retry loop to try next endpoint (which has different quota) - break - } - - // Handle 5xx server errors with exponential backoff retry - if httpResp.StatusCode >= 500 && httpResp.StatusCode < 600 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) - - if attempt < maxRetries { - // Exponential backoff: 1s, 2s, 4s... (max 30s) - backoff := time.Duration(1< 30*time.Second { - backoff = 30 * time.Second - } - log.Warnf("kiro: stream server error %d, retrying in %v (attempt %d/%d)", httpResp.StatusCode, backoff, attempt+1, maxRetries) - time.Sleep(backoff) - continue + for attempt := 0; attempt <= maxRetries; attempt++ { + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(kiroPayload)) + if err != nil { + return nil, err } - log.Errorf("kiro: stream server error %d after %d retries", httpResp.StatusCode, maxRetries) - return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } - // Handle 400 errors - Credential/Validation issues - // Do NOT switch endpoints - return error immediately - if httpResp.StatusCode == 400 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) + httpReq.Header.Set("Content-Type", kiroContentType) + httpReq.Header.Set("Authorization", "Bearer "+accessToken) + httpReq.Header.Set("Accept", kiroAcceptStream) + // Use endpoint-specific X-Amz-Target (critical for avoiding 403 errors) + httpReq.Header.Set("X-Amz-Target", endpointConfig.AmzTarget) + httpReq.Header.Set("User-Agent", kiroUserAgent) + httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent) + httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3") + httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String()) - log.Warnf("kiro: received 400 error (attempt %d/%d), body: %s", attempt+1, maxRetries+1, summarizeErrorBody(httpResp.Header.Get("Content-Type"), respBody)) + var attrs map[string]string + if auth != nil { + attrs = auth.Attributes + } + util.ApplyCustomHeadersFromAttrs(httpReq, attrs) - // 400 errors indicate request validation issues - return immediately without retry - return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } + var authID, authLabel, authType, authValue string + if auth != nil { + authID = auth.ID + authLabel = auth.Label + authType, authValue = auth.AccountInfo() + } + recordAPIRequest(ctx, e.cfg, upstreamRequestLog{ + URL: url, + Method: http.MethodPost, + Headers: httpReq.Header.Clone(), + Body: kiroPayload, + Provider: e.Identifier(), + AuthID: authID, + AuthLabel: authLabel, + AuthType: authType, + AuthValue: authValue, + }) - // Handle 401 errors with token refresh and retry - // 401 = Unauthorized (token expired/invalid) - refresh token - if httpResp.StatusCode == 401 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpResp, err := httpClient.Do(httpReq) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return nil, err + } + recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone()) - if attempt < maxRetries { - log.Warnf("kiro: stream received 401 error, attempting token refresh and retry (attempt %d/%d)", attempt+1, maxRetries+1) + // Handle 429 errors (quota exhausted) - try next endpoint + // Each endpoint has its own quota pool, so we can try different endpoints + if httpResp.StatusCode == 429 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) - refreshedAuth, refreshErr := e.Refresh(ctx, auth) - if refreshErr != nil { - log.Errorf("kiro: token refresh failed: %v", refreshErr) - return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } + // Preserve last 429 so callers can correctly backoff when all endpoints are exhausted + last429Err = statusErr{code: httpResp.StatusCode, msg: string(respBody)} - if refreshedAuth != nil { - auth = refreshedAuth - accessToken, profileArn = kiroCredentials(auth) - // Rebuild payload with new profile ARN if changed - kiroPayload = e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) - log.Infof("kiro: token refreshed successfully, retrying stream request") + log.Warnf("kiro: stream %s endpoint quota exhausted (429), will try next endpoint, body: %s", + endpointConfig.Name, summarizeErrorBody(httpResp.Header.Get("Content-Type"), respBody)) + + // Break inner retry loop to try next endpoint (which has different quota) + break + } + + // Handle 5xx server errors with exponential backoff retry + if httpResp.StatusCode >= 500 && httpResp.StatusCode < 600 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) + + if attempt < maxRetries { + // Exponential backoff: 1s, 2s, 4s... (max 30s) + backoff := time.Duration(1< 30*time.Second { + backoff = 30 * time.Second + } + log.Warnf("kiro: stream server error %d, retrying in %v (attempt %d/%d)", httpResp.StatusCode, backoff, attempt+1, maxRetries) + time.Sleep(backoff) continue } + log.Errorf("kiro: stream server error %d after %d retries", httpResp.StatusCode, maxRetries) + return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} } - log.Warnf("kiro stream error, status: 401, body: %s", string(respBody)) - return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } + // Handle 400 errors - Credential/Validation issues + // Do NOT switch endpoints - return error immediately + if httpResp.StatusCode == 400 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) - // Handle 402 errors - Monthly Limit Reached - if httpResp.StatusCode == 402 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) + log.Warnf("kiro: received 400 error (attempt %d/%d), body: %s", attempt+1, maxRetries+1, summarizeErrorBody(httpResp.Header.Get("Content-Type"), respBody)) - log.Warnf("kiro: stream received 402 (monthly limit). Upstream body: %s", string(respBody)) - - // Return upstream error body directly - return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } - - // Handle 403 errors - Access Denied / Token Expired - // Do NOT switch endpoints for 403 errors - if httpResp.StatusCode == 403 { - respBody, _ := io.ReadAll(httpResp.Body) - _ = httpResp.Body.Close() - appendAPIResponseChunk(ctx, e.cfg, respBody) - - // Log the 403 error details for debugging - log.Warnf("kiro: stream received 403 error (attempt %d/%d), body: %s", attempt+1, maxRetries+1, string(respBody)) - - respBodyStr := string(respBody) - - // Check for SUSPENDED status - return immediately without retry - if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") { - log.Errorf("kiro: account is suspended, cannot proceed") - return nil, statusErr{code: httpResp.StatusCode, msg: "account suspended: " + string(respBody)} + // 400 errors indicate request validation issues - return immediately without retry + return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} } - // Check if this looks like a token-related 403 (some APIs return 403 for expired tokens) - isTokenRelated := strings.Contains(respBodyStr, "token") || - strings.Contains(respBodyStr, "expired") || - strings.Contains(respBodyStr, "invalid") || - strings.Contains(respBodyStr, "unauthorized") + // Handle 401 errors with token refresh and retry + // 401 = Unauthorized (token expired/invalid) - refresh token + if httpResp.StatusCode == 401 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) - if isTokenRelated && attempt < maxRetries { - log.Warnf("kiro: 403 appears token-related, attempting token refresh") - refreshedAuth, refreshErr := e.Refresh(ctx, auth) - if refreshErr != nil { - log.Errorf("kiro: token refresh failed: %v", refreshErr) - // Token refresh failed - return error immediately - return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } - if refreshedAuth != nil { - auth = refreshedAuth - accessToken, profileArn = kiroCredentials(auth) - kiroPayload = e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) - log.Infof("kiro: token refreshed for 403, retrying stream request") - continue + if attempt < maxRetries { + log.Warnf("kiro: stream received 401 error, attempting token refresh and retry (attempt %d/%d)", attempt+1, maxRetries+1) + + refreshedAuth, refreshErr := e.Refresh(ctx, auth) + if refreshErr != nil { + log.Errorf("kiro: token refresh failed: %v", refreshErr) + return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} + } + + if refreshedAuth != nil { + auth = refreshedAuth + accessToken, profileArn = kiroCredentials(auth) + // Rebuild payload with new profile ARN if changed + kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + log.Infof("kiro: token refreshed successfully, retrying stream request") + continue + } } + + log.Warnf("kiro stream error, status: 401, body: %s", string(respBody)) + return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} } - // For non-token 403 or after max retries, return error immediately + // Handle 402 errors - Monthly Limit Reached + if httpResp.StatusCode == 402 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) + + log.Warnf("kiro: stream received 402 (monthly limit). Upstream body: %s", string(respBody)) + + // Return upstream error body directly + return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} + } + + // Handle 403 errors - Access Denied / Token Expired // Do NOT switch endpoints for 403 errors - log.Warnf("kiro: 403 error, returning immediately (no endpoint switch)") - return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} - } + if httpResp.StatusCode == 403 { + respBody, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + appendAPIResponseChunk(ctx, e.cfg, respBody) - if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - b, _ := io.ReadAll(httpResp.Body) - appendAPIResponseChunk(ctx, e.cfg, b) - log.Debugf("kiro stream error, status: %d, body: %s", httpResp.StatusCode, string(b)) - if errClose := httpResp.Body.Close(); errClose != nil { - log.Errorf("response body close error: %v", errClose) - } - return nil, statusErr{code: httpResp.StatusCode, msg: string(b)} - } + // Log the 403 error details for debugging + log.Warnf("kiro: stream received 403 error (attempt %d/%d), body: %s", attempt+1, maxRetries+1, string(respBody)) - out := make(chan cliproxyexecutor.StreamChunk) + respBodyStr := string(respBody) - go func(resp *http.Response) { - defer close(out) - defer func() { - if r := recover(); r != nil { - log.Errorf("kiro: panic in stream handler: %v", r) - out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("internal error: %v", r)} + // Check for SUSPENDED status - return immediately without retry + if strings.Contains(respBodyStr, "SUSPENDED") || strings.Contains(respBodyStr, "TEMPORARILY_SUSPENDED") { + log.Errorf("kiro: account is suspended, cannot proceed") + return nil, statusErr{code: httpResp.StatusCode, msg: "account suspended: " + string(respBody)} } - }() - defer func() { - if errClose := resp.Body.Close(); errClose != nil { + + // Check if this looks like a token-related 403 (some APIs return 403 for expired tokens) + isTokenRelated := strings.Contains(respBodyStr, "token") || + strings.Contains(respBodyStr, "expired") || + strings.Contains(respBodyStr, "invalid") || + strings.Contains(respBodyStr, "unauthorized") + + if isTokenRelated && attempt < maxRetries { + log.Warnf("kiro: 403 appears token-related, attempting token refresh") + refreshedAuth, refreshErr := e.Refresh(ctx, auth) + if refreshErr != nil { + log.Errorf("kiro: token refresh failed: %v", refreshErr) + // Token refresh failed - return error immediately + return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} + } + if refreshedAuth != nil { + auth = refreshedAuth + accessToken, profileArn = kiroCredentials(auth) + kiroPayload = kiroclaude.BuildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly) + log.Infof("kiro: token refreshed for 403, retrying stream request") + continue + } + } + + // For non-token 403 or after max retries, return error immediately + // Do NOT switch endpoints for 403 errors + log.Warnf("kiro: 403 error, returning immediately (no endpoint switch)") + return nil, statusErr{code: httpResp.StatusCode, msg: string(respBody)} + } + + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + b, _ := io.ReadAll(httpResp.Body) + appendAPIResponseChunk(ctx, e.cfg, b) + log.Debugf("kiro stream error, status: %d, body: %s", httpResp.StatusCode, string(b)) + if errClose := httpResp.Body.Close(); errClose != nil { log.Errorf("response body close error: %v", errClose) } - }() + return nil, statusErr{code: httpResp.StatusCode, msg: string(b)} + } - e.streamToChannel(ctx, resp.Body, out, from, req.Model, opts.OriginalRequest, body, reporter) - }(httpResp) + out := make(chan cliproxyexecutor.StreamChunk) - return out, nil + go func(resp *http.Response) { + defer close(out) + defer func() { + if r := recover(); r != nil { + log.Errorf("kiro: panic in stream handler: %v", r) + out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("internal error: %v", r)} + } + }() + defer func() { + if errClose := resp.Body.Close(); errClose != nil { + log.Errorf("response body close error: %v", errClose) + } + }() + + e.streamToChannel(ctx, resp.Body, out, from, req.Model, opts.OriginalRequest, body, reporter) + }(httpResp) + + return out, nil } // Inner retry loop exhausted for this endpoint, try next endpoint // Note: This code is unreachable because all paths in the inner loop @@ -867,16 +766,18 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox } // All endpoints exhausted + if last429Err != nil { + return nil, last429Err + } return nil, fmt.Errorf("kiro: stream all endpoints exhausted") } - // kiroCredentials extracts access token and profile ARN from auth. func kiroCredentials(auth *cliproxyauth.Auth) (accessToken, profileArn string) { if auth == nil { return "", "" } - + // Try Metadata first (wrapper format) if auth.Metadata != nil { if token, ok := auth.Metadata["access_token"].(string); ok { @@ -886,13 +787,13 @@ func kiroCredentials(auth *cliproxyauth.Auth) (accessToken, profileArn string) { profileArn = arn } } - + // Try Attributes if accessToken == "" && auth.Attributes != nil { accessToken = auth.Attributes["access_token"] profileArn = auth.Attributes["profile_arn"] } - + // Try direct fields from flat JSON format (new AWS Builder ID format) if accessToken == "" && auth.Metadata != nil { if token, ok := auth.Metadata["accessToken"].(string); ok { @@ -902,10 +803,46 @@ func kiroCredentials(auth *cliproxyauth.Auth) (accessToken, profileArn string) { profileArn = arn } } - + return accessToken, profileArn } +// determineAgenticMode determines if the model is an agentic or chat-only variant. +// Returns (isAgentic, isChatOnly) based on model name suffixes. +func determineAgenticMode(model string) (isAgentic, isChatOnly bool) { + isAgentic = strings.HasSuffix(model, "-agentic") + isChatOnly = strings.HasSuffix(model, "-chat") + return isAgentic, isChatOnly +} + +// getEffectiveProfileArn determines if profileArn should be included based on auth method. +// profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO). +func getEffectiveProfileArn(auth *cliproxyauth.Auth, profileArn string) string { + if auth != nil && auth.Metadata != nil { + if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" { + return "" // Don't include profileArn for builder-id auth + } + } + return profileArn +} + +// getEffectiveProfileArnWithWarning determines if profileArn should be included based on auth method, +// and logs a warning if profileArn is missing for non-builder-id auth. +// This consolidates the auth_method check that was previously done separately. +func getEffectiveProfileArnWithWarning(auth *cliproxyauth.Auth, profileArn string) string { + if auth != nil && auth.Metadata != nil { + if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" { + // builder-id auth doesn't need profileArn + return "" + } + } + // For non-builder-id auth (social auth), profileArn is required + if profileArn == "" { + log.Warnf("kiro: profile ARN not found in auth, API calls may fail") + } + return profileArn +} + // mapModelToKiro maps external model names to Kiro model IDs. // Supports both Kiro and Amazon Q prefixes since they use the same API. // Agentic variants (-agentic suffix) map to the same backend model IDs. @@ -939,28 +876,28 @@ func (e *KiroExecutor) mapModelToKiro(model string) string { "claude-sonnet-4-20250514": "claude-sonnet-4", "auto": "auto", // Agentic variants (same backend model IDs, but with special system prompt) - "claude-opus-4.5-agentic": "claude-opus-4.5", - "claude-sonnet-4.5-agentic": "claude-sonnet-4.5", - "claude-sonnet-4-agentic": "claude-sonnet-4", - "claude-haiku-4.5-agentic": "claude-haiku-4.5", - "kiro-claude-opus-4-5-agentic": "claude-opus-4.5", - "kiro-claude-sonnet-4-5-agentic": "claude-sonnet-4.5", - "kiro-claude-sonnet-4-agentic": "claude-sonnet-4", - "kiro-claude-haiku-4-5-agentic": "claude-haiku-4.5", + "claude-opus-4.5-agentic": "claude-opus-4.5", + "claude-sonnet-4.5-agentic": "claude-sonnet-4.5", + "claude-sonnet-4-agentic": "claude-sonnet-4", + "claude-haiku-4.5-agentic": "claude-haiku-4.5", + "kiro-claude-opus-4-5-agentic": "claude-opus-4.5", + "kiro-claude-sonnet-4-5-agentic": "claude-sonnet-4.5", + "kiro-claude-sonnet-4-agentic": "claude-sonnet-4", + "kiro-claude-haiku-4-5-agentic": "claude-haiku-4.5", } if kiroID, ok := modelMap[model]; ok { return kiroID } - + // Smart fallback: try to infer model type from name patterns modelLower := strings.ToLower(model) - + // Check for Haiku variants if strings.Contains(modelLower, "haiku") { log.Debugf("kiro: unknown Haiku model '%s', mapping to claude-haiku-4.5", model) return "claude-haiku-4.5" } - + // Check for Sonnet variants if strings.Contains(modelLower, "sonnet") { // Check for specific version patterns @@ -976,13 +913,13 @@ func (e *KiroExecutor) mapModelToKiro(model string) string { log.Debugf("kiro: unknown Sonnet model '%s', mapping to claude-sonnet-4", model) return "claude-sonnet-4" } - + // Check for Opus variants if strings.Contains(modelLower, "opus") { log.Debugf("kiro: unknown Opus model '%s', mapping to claude-opus-4.5", model) return "claude-opus-4.5" } - + // Final fallback to Sonnet 4.5 (most commonly used model) log.Warnf("kiro: unknown model '%s', falling back to claude-sonnet-4.5", model) return "claude-sonnet-4.5" @@ -1008,582 +945,23 @@ type eventStreamMessage struct { Payload []byte // JSON payload of the message } -// Kiro API request structs - field order determines JSON key order - -type kiroPayload struct { - ConversationState kiroConversationState `json:"conversationState"` - ProfileArn string `json:"profileArn,omitempty"` - InferenceConfig *kiroInferenceConfig `json:"inferenceConfig,omitempty"` -} - -// kiroInferenceConfig contains inference parameters for the Kiro API. -// NOTE: This is an experimental addition - Kiro/Amazon Q API may not support these parameters. -// If the API ignores or rejects these fields, response length is controlled internally by the model. -type kiroInferenceConfig struct { - MaxTokens int `json:"maxTokens,omitempty"` // Maximum output tokens (may be ignored by API) - Temperature float64 `json:"temperature,omitempty"` // Sampling temperature (may be ignored by API) -} - -type kiroConversationState struct { - ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL" - must be first field - ConversationID string `json:"conversationId"` - CurrentMessage kiroCurrentMessage `json:"currentMessage"` - History []kiroHistoryMessage `json:"history,omitempty"` -} - -type kiroCurrentMessage struct { - UserInputMessage kiroUserInputMessage `json:"userInputMessage"` -} - -type kiroHistoryMessage struct { - UserInputMessage *kiroUserInputMessage `json:"userInputMessage,omitempty"` - AssistantResponseMessage *kiroAssistantResponseMessage `json:"assistantResponseMessage,omitempty"` -} - -// kiroImage represents an image in Kiro API format -type kiroImage struct { - Format string `json:"format"` - Source kiroImageSource `json:"source"` -} - -// kiroImageSource contains the image data -type kiroImageSource struct { - Bytes string `json:"bytes"` // base64 encoded image data -} - -type kiroUserInputMessage struct { - Content string `json:"content"` - ModelID string `json:"modelId"` - Origin string `json:"origin"` - Images []kiroImage `json:"images,omitempty"` - UserInputMessageContext *kiroUserInputMessageContext `json:"userInputMessageContext,omitempty"` -} - -type kiroUserInputMessageContext struct { - ToolResults []kiroToolResult `json:"toolResults,omitempty"` - Tools []kiroToolWrapper `json:"tools,omitempty"` -} - -type kiroToolResult struct { - Content []kiroTextContent `json:"content"` - Status string `json:"status"` - ToolUseID string `json:"toolUseId"` -} - -type kiroTextContent struct { - Text string `json:"text"` -} - -type kiroToolWrapper struct { - ToolSpecification kiroToolSpecification `json:"toolSpecification"` -} - -type kiroToolSpecification struct { - Name string `json:"name"` - Description string `json:"description"` - InputSchema kiroInputSchema `json:"inputSchema"` -} - -type kiroInputSchema struct { - JSON interface{} `json:"json"` -} - -type kiroAssistantResponseMessage struct { - Content string `json:"content"` - ToolUses []kiroToolUse `json:"toolUses,omitempty"` -} - -type kiroToolUse struct { - ToolUseID string `json:"toolUseId"` - Name string `json:"name"` - Input map[string]interface{} `json:"input"` -} - -// buildKiroPayload constructs the Kiro API request payload. -// Supports tool calling - tools are passed via userInputMessageContext. -// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE. -// isAgentic parameter enables chunked write optimization prompt for -agentic model variants. -// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode). -// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint. -// -// max_tokens support: Kiro/Amazon Q API may not officially support max_tokens parameter. -// We attempt to pass it via inferenceConfig.maxTokens, but the API may ignore it. -// Response truncation can be detected via stop_reason == "max_tokens" in the response. -func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte { - // Extract max_tokens for potential use in inferenceConfig - var maxTokens int64 - if mt := gjson.GetBytes(claudeBody, "max_tokens"); mt.Exists() { - maxTokens = mt.Int() - } - - // Extract temperature if specified - var temperature float64 - var hasTemperature bool - if temp := gjson.GetBytes(claudeBody, "temperature"); temp.Exists() { - temperature = temp.Float() - hasTemperature = true - } - - // Normalize origin value for Kiro API compatibility - // Kiro API only accepts "CLI" or "AI_EDITOR" as valid origin values - switch origin { - case "KIRO_CLI": - origin = "CLI" - case "KIRO_AI_EDITOR": - origin = "AI_EDITOR" - case "AMAZON_Q": - origin = "CLI" - case "KIRO_IDE": - origin = "AI_EDITOR" - // Add any other non-standard origin values that need normalization - default: - // Keep the original value if it's already standard - // Valid values: "CLI", "AI_EDITOR" - } - log.Debugf("kiro: normalized origin value: %s", origin) - - messages := gjson.GetBytes(claudeBody, "messages") - - // For chat-only mode, don't include tools - var tools gjson.Result - if !isChatOnly { - tools = gjson.GetBytes(claudeBody, "tools") - } - - // Extract system prompt - can be string or array of content blocks - systemField := gjson.GetBytes(claudeBody, "system") - var systemPrompt string - if systemField.IsArray() { - // System is array of content blocks, extract text - var sb strings.Builder - for _, block := range systemField.Array() { - if block.Get("type").String() == "text" { - sb.WriteString(block.Get("text").String()) - } else if block.Type == gjson.String { - sb.WriteString(block.String()) - } - } - systemPrompt = sb.String() - } else { - systemPrompt = systemField.String() - } - - // Check for thinking parameter in Claude API request - // Claude API format: {"thinking": {"type": "enabled", "budget_tokens": 16000}} - // When thinking is enabled, inject dynamic thinkingHint based on budget_tokens - // This allows reasoning_effort (low/medium/high) to control actual thinking length - thinkingEnabled := false - var budgetTokens int64 = 16000 // Default value (same as OpenAI reasoning_effort "medium") - thinkingField := gjson.GetBytes(claudeBody, "thinking") - if thinkingField.Exists() { - // Check if thinking.type is "enabled" - thinkingType := thinkingField.Get("type").String() - if thinkingType == "enabled" { - thinkingEnabled = true - // Read budget_tokens if specified - this value comes from: - // - Claude API: thinking.budget_tokens directly - // - OpenAI API: reasoning_effort -> budget_tokens (low:4000, medium:16000, high:32000) - if bt := thinkingField.Get("budget_tokens"); bt.Exists() { - budgetTokens = bt.Int() - // If budget_tokens <= 0, disable thinking explicitly - // This allows users to disable thinking by setting budget_tokens to 0 - if budgetTokens <= 0 { - thinkingEnabled = false - log.Debugf("kiro: thinking mode disabled via budget_tokens <= 0") - } - } - if thinkingEnabled { - log.Debugf("kiro: thinking mode enabled via Claude API parameter, budget_tokens: %d", budgetTokens) - } - } - } - - // Inject timestamp context for better temporal awareness - // Based on amq2api implementation - helps model understand current time context - timestamp := time.Now().Format("2006-01-02 15:04:05 MST") - timestampContext := fmt.Sprintf("[Context: Current time is %s]", timestamp) - if systemPrompt != "" { - systemPrompt = timestampContext + "\n\n" + systemPrompt - } else { - systemPrompt = timestampContext - } - log.Debugf("kiro: injected timestamp context: %s", timestamp) - - // Inject agentic optimization prompt for -agentic model variants - // This prevents AWS Kiro API timeouts during large file write operations - if isAgentic { - if systemPrompt != "" { - systemPrompt += "\n" - } - systemPrompt += kiroAgenticSystemPrompt - } - - // Inject thinking hint when thinking mode is enabled - // This tells the model to use tags in its response - // DYNAMICALLY set max_thinking_length based on budget_tokens from request - // This respects the reasoning_effort setting: low(4000), medium(16000), high(32000) - if thinkingEnabled { - if systemPrompt != "" { - systemPrompt += "\n" - } - // Build dynamic thinking hint with the actual budget_tokens value - dynamicThinkingHint := fmt.Sprintf("interleaved%d", budgetTokens) - systemPrompt += dynamicThinkingHint - log.Debugf("kiro: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens) - } - - // Convert Claude tools to Kiro format - var kiroTools []kiroToolWrapper - if tools.IsArray() { - for _, tool := range tools.Array() { - name := tool.Get("name").String() - description := tool.Get("description").String() - inputSchema := tool.Get("input_schema").Value() - - // Truncate long descriptions (Kiro API limit is in bytes) - // Truncate at valid UTF-8 boundary to avoid breaking multi-byte chars - // Add truncation notice to help model understand the description is incomplete - if len(description) > kiroMaxToolDescLen { - // Find a valid UTF-8 boundary before the limit - // Reserve space for truncation notice (about 30 bytes) - truncLen := kiroMaxToolDescLen - 30 - for truncLen > 0 && !utf8.RuneStart(description[truncLen]) { - truncLen-- - } - description = description[:truncLen] + "... (description truncated)" - } - - kiroTools = append(kiroTools, kiroToolWrapper{ - ToolSpecification: kiroToolSpecification{ - Name: name, - Description: description, - InputSchema: kiroInputSchema{JSON: inputSchema}, - }, - }) - } - } - - var history []kiroHistoryMessage - var currentUserMsg *kiroUserInputMessage - var currentToolResults []kiroToolResult - - // Merge adjacent messages with the same role before processing - // This reduces API call complexity and improves compatibility - messagesArray := mergeAdjacentMessages(messages.Array()) - for i, msg := range messagesArray { - role := msg.Get("role").String() - isLastMessage := i == len(messagesArray)-1 - - if role == "user" { - userMsg, toolResults := e.buildUserMessageStruct(msg, modelID, origin) - if isLastMessage { - currentUserMsg = &userMsg - currentToolResults = toolResults - } else { - // CRITICAL: Kiro API requires content to be non-empty for history messages too - if strings.TrimSpace(userMsg.Content) == "" { - if len(toolResults) > 0 { - userMsg.Content = "Tool results provided." - } else { - userMsg.Content = "Continue" - } - } - // For history messages, embed tool results in context - if len(toolResults) > 0 { - userMsg.UserInputMessageContext = &kiroUserInputMessageContext{ - ToolResults: toolResults, - } - } - history = append(history, kiroHistoryMessage{ - UserInputMessage: &userMsg, - }) - } - } else if role == "assistant" { - assistantMsg := e.buildAssistantMessageStruct(msg) - // If this is the last message and it's an assistant message, - // we need to add it to history and create a "Continue" user message - // because Kiro API requires currentMessage to be userInputMessage type - if isLastMessage { - history = append(history, kiroHistoryMessage{ - AssistantResponseMessage: &assistantMsg, - }) - // Create a "Continue" user message as currentMessage - currentUserMsg = &kiroUserInputMessage{ - Content: "Continue", - ModelID: modelID, - Origin: origin, - } - } else { - history = append(history, kiroHistoryMessage{ - AssistantResponseMessage: &assistantMsg, - }) - } - } - } - - // Build content with system prompt - if currentUserMsg != nil { - var contentBuilder strings.Builder - - // Add system prompt if present - if systemPrompt != "" { - contentBuilder.WriteString("--- SYSTEM PROMPT ---\n") - contentBuilder.WriteString(systemPrompt) - contentBuilder.WriteString("\n--- END SYSTEM PROMPT ---\n\n") - } - - // Add the actual user message - contentBuilder.WriteString(currentUserMsg.Content) - finalContent := contentBuilder.String() - - // CRITICAL: Kiro API requires content to be non-empty, even when toolResults are present - // If content is empty or only whitespace, provide a default message - if strings.TrimSpace(finalContent) == "" { - if len(currentToolResults) > 0 { - finalContent = "Tool results provided." - } else { - finalContent = "Continue" - } - log.Debugf("kiro: content was empty, using default: %s", finalContent) - } - currentUserMsg.Content = finalContent - - // Deduplicate currentToolResults before adding to context - // Kiro API does not accept duplicate toolUseIds - if len(currentToolResults) > 0 { - seenIDs := make(map[string]bool) - uniqueToolResults := make([]kiroToolResult, 0, len(currentToolResults)) - for _, tr := range currentToolResults { - if !seenIDs[tr.ToolUseID] { - seenIDs[tr.ToolUseID] = true - uniqueToolResults = append(uniqueToolResults, tr) - } else { - log.Debugf("kiro: skipping duplicate toolResult in currentMessage: %s", tr.ToolUseID) - } - } - currentToolResults = uniqueToolResults - } - - // Build userInputMessageContext with tools and tool results - if len(kiroTools) > 0 || len(currentToolResults) > 0 { - currentUserMsg.UserInputMessageContext = &kiroUserInputMessageContext{ - Tools: kiroTools, - ToolResults: currentToolResults, - } - } - } - - // Build payload using structs (preserves key order) - var currentMessage kiroCurrentMessage - if currentUserMsg != nil { - currentMessage = kiroCurrentMessage{UserInputMessage: *currentUserMsg} - } else { - // Fallback when no user messages - still include system prompt if present - fallbackContent := "" - if systemPrompt != "" { - fallbackContent = "--- SYSTEM PROMPT ---\n" + systemPrompt + "\n--- END SYSTEM PROMPT ---\n" - } - currentMessage = kiroCurrentMessage{UserInputMessage: kiroUserInputMessage{ - Content: fallbackContent, - ModelID: modelID, - Origin: origin, - }} - } - - // Build inferenceConfig if we have any inference parameters - var inferenceConfig *kiroInferenceConfig - if maxTokens > 0 || hasTemperature { - inferenceConfig = &kiroInferenceConfig{} - if maxTokens > 0 { - inferenceConfig.MaxTokens = int(maxTokens) - } - if hasTemperature { - inferenceConfig.Temperature = temperature - } - } - - // Build payload with correct field order (matches struct definition) - payload := kiroPayload{ - ConversationState: kiroConversationState{ - ChatTriggerType: "MANUAL", // Required by Kiro API - must be first - ConversationID: uuid.New().String(), - CurrentMessage: currentMessage, - History: history, // Now always included (non-nil slice) - }, - ProfileArn: profileArn, - InferenceConfig: inferenceConfig, - } - - result, err := json.Marshal(payload) - if err != nil { - log.Debugf("kiro: failed to marshal payload: %v", err) - return nil - } - - return result -} - -// buildUserMessageStruct builds a user message and extracts tool results -// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE. -// IMPORTANT: Kiro API does not accept duplicate toolUseIds, so we deduplicate here. -func (e *KiroExecutor) buildUserMessageStruct(msg gjson.Result, modelID, origin string) (kiroUserInputMessage, []kiroToolResult) { - content := msg.Get("content") - var contentBuilder strings.Builder - var toolResults []kiroToolResult - var images []kiroImage - - // Track seen toolUseIds to deduplicate - Kiro API rejects duplicate toolUseIds - seenToolUseIDs := make(map[string]bool) - - if content.IsArray() { - for _, part := range content.Array() { - partType := part.Get("type").String() - switch partType { - case "text": - contentBuilder.WriteString(part.Get("text").String()) - case "image": - // Extract image data from Claude API format - mediaType := part.Get("source.media_type").String() - data := part.Get("source.data").String() - - // Extract format from media_type (e.g., "image/png" -> "png") - format := "" - if idx := strings.LastIndex(mediaType, "/"); idx != -1 { - format = mediaType[idx+1:] - } - - if format != "" && data != "" { - images = append(images, kiroImage{ - Format: format, - Source: kiroImageSource{ - Bytes: data, - }, - }) - } - case "tool_result": - // Extract tool result for API - toolUseID := part.Get("tool_use_id").String() - - // Skip duplicate toolUseIds - Kiro API does not accept duplicates - if seenToolUseIDs[toolUseID] { - log.Debugf("kiro: skipping duplicate tool_result with toolUseId: %s", toolUseID) - continue - } - seenToolUseIDs[toolUseID] = true - - isError := part.Get("is_error").Bool() - resultContent := part.Get("content") - - // Convert content to Kiro format: [{text: "..."}] - var textContents []kiroTextContent - if resultContent.IsArray() { - for _, item := range resultContent.Array() { - if item.Get("type").String() == "text" { - textContents = append(textContents, kiroTextContent{Text: item.Get("text").String()}) - } else if item.Type == gjson.String { - textContents = append(textContents, kiroTextContent{Text: item.String()}) - } - } - } else if resultContent.Type == gjson.String { - textContents = append(textContents, kiroTextContent{Text: resultContent.String()}) - } - - // If no content, add default message - if len(textContents) == 0 { - textContents = append(textContents, kiroTextContent{Text: "Tool use was cancelled by the user"}) - } - - status := "success" - if isError { - status = "error" - } - - toolResults = append(toolResults, kiroToolResult{ - ToolUseID: toolUseID, - Content: textContents, - Status: status, - }) - } - } - } else { - contentBuilder.WriteString(content.String()) - } - - userMsg := kiroUserInputMessage{ - Content: contentBuilder.String(), - ModelID: modelID, - Origin: origin, - } - - // Add images to message if present - if len(images) > 0 { - userMsg.Images = images - } - - return userMsg, toolResults -} - -// buildAssistantMessageStruct builds an assistant message with tool uses -func (e *KiroExecutor) buildAssistantMessageStruct(msg gjson.Result) kiroAssistantResponseMessage { - content := msg.Get("content") - var contentBuilder strings.Builder - var toolUses []kiroToolUse - - if content.IsArray() { - for _, part := range content.Array() { - partType := part.Get("type").String() - switch partType { - case "text": - contentBuilder.WriteString(part.Get("text").String()) - case "tool_use": - // Extract tool use for API - toolUseID := part.Get("id").String() - toolName := part.Get("name").String() - toolInput := part.Get("input") - - // Convert input to map - var inputMap map[string]interface{} - if toolInput.IsObject() { - inputMap = make(map[string]interface{}) - toolInput.ForEach(func(key, value gjson.Result) bool { - inputMap[key.String()] = value.Value() - return true - }) - } - - toolUses = append(toolUses, kiroToolUse{ - ToolUseID: toolUseID, - Name: toolName, - Input: inputMap, - }) - } - } - } else { - contentBuilder.WriteString(content.String()) - } - - return kiroAssistantResponseMessage{ - Content: contentBuilder.String(), - ToolUses: toolUses, - } -} - -// NOTE: Tool calling is now supported via userInputMessageContext.tools and toolResults +// NOTE: Request building functions moved to internal/translator/kiro/claude/kiro_claude_request.go +// The executor now uses kiroclaude.BuildKiroPayload() instead // parseEventStream parses AWS Event Stream binary format. // Extracts text content, tool uses, and stop_reason from the response. // Supports embedded [Called ...] tool calls and input buffering for toolUseEvent. // Returns: content, toolUses, usageInfo, stopReason, error -func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse, usage.Detail, string, error) { +func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.KiroToolUse, usage.Detail, string, error) { var content strings.Builder - var toolUses []kiroToolUse + var toolUses []kiroclaude.KiroToolUse var usageInfo usage.Detail var stopReason string // Extracted from upstream response reader := bufio.NewReader(body) // Tool use state tracking for input buffering and deduplication processedIDs := make(map[string]bool) - var currentToolUse *toolUseState + var currentToolUse *kiroclaude.ToolUseState for { msg, eventErr := e.readEventStreamMessage(reader) @@ -1635,11 +1013,11 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse, // Extract stop_reason from various event formats // Kiro/Amazon Q API may include stop_reason in different locations - if sr := getString(event, "stop_reason"); sr != "" { + if sr := kirocommon.GetString(event, "stop_reason"); sr != "" { stopReason = sr log.Debugf("kiro: parseEventStream found stop_reason (top-level): %s", stopReason) } - if sr := getString(event, "stopReason"); sr != "" { + if sr := kirocommon.GetString(event, "stopReason"); sr != "" { stopReason = sr log.Debugf("kiro: parseEventStream found stopReason (top-level): %s", stopReason) } @@ -1657,11 +1035,11 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse, content.WriteString(contentText) } // Extract stop_reason from assistantResponseEvent - if sr := getString(assistantResp, "stop_reason"); sr != "" { + if sr := kirocommon.GetString(assistantResp, "stop_reason"); sr != "" { stopReason = sr log.Debugf("kiro: parseEventStream found stop_reason in assistantResponseEvent: %s", stopReason) } - if sr := getString(assistantResp, "stopReason"); sr != "" { + if sr := kirocommon.GetString(assistantResp, "stopReason"); sr != "" { stopReason = sr log.Debugf("kiro: parseEventStream found stopReason in assistantResponseEvent: %s", stopReason) } @@ -1669,17 +1047,17 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse, if toolUsesRaw, ok := assistantResp["toolUses"].([]interface{}); ok { for _, tuRaw := range toolUsesRaw { if tu, ok := tuRaw.(map[string]interface{}); ok { - toolUseID := getString(tu, "toolUseId") + toolUseID := kirocommon.GetStringValue(tu, "toolUseId") // Check for duplicate if processedIDs[toolUseID] { log.Debugf("kiro: skipping duplicate tool use from assistantResponse: %s", toolUseID) continue } processedIDs[toolUseID] = true - - toolUse := kiroToolUse{ + + toolUse := kiroclaude.KiroToolUse{ ToolUseID: toolUseID, - Name: getString(tu, "name"), + Name: kirocommon.GetStringValue(tu, "name"), } if input, ok := tu["input"].(map[string]interface{}); ok { toolUse.Input = input @@ -1697,17 +1075,17 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse, if toolUsesRaw, ok := event["toolUses"].([]interface{}); ok { for _, tuRaw := range toolUsesRaw { if tu, ok := tuRaw.(map[string]interface{}); ok { - toolUseID := getString(tu, "toolUseId") + toolUseID := kirocommon.GetStringValue(tu, "toolUseId") // Check for duplicate if processedIDs[toolUseID] { log.Debugf("kiro: skipping duplicate direct tool use: %s", toolUseID) continue } processedIDs[toolUseID] = true - - toolUse := kiroToolUse{ + + toolUse := kiroclaude.KiroToolUse{ ToolUseID: toolUseID, - Name: getString(tu, "name"), + Name: kirocommon.GetStringValue(tu, "name"), } if input, ok := tu["input"].(map[string]interface{}); ok { toolUse.Input = input @@ -1719,7 +1097,7 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse, case "toolUseEvent": // Handle dedicated tool use events with input buffering - completedToolUses, newState := e.processToolUseEvent(event, currentToolUse, processedIDs) + completedToolUses, newState := kiroclaude.ProcessToolUseEvent(event, currentToolUse, processedIDs) currentToolUse = newState toolUses = append(toolUses, completedToolUses...) @@ -1733,11 +1111,11 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse, case "messageStopEvent", "message_stop": // Handle message stop events which may contain stop_reason - if sr := getString(event, "stop_reason"); sr != "" { + if sr := kirocommon.GetString(event, "stop_reason"); sr != "" { stopReason = sr log.Debugf("kiro: parseEventStream found stop_reason in messageStopEvent: %s", stopReason) } - if sr := getString(event, "stopReason"); sr != "" { + if sr := kirocommon.GetString(event, "stopReason"); sr != "" { stopReason = sr log.Debugf("kiro: parseEventStream found stopReason in messageStopEvent: %s", stopReason) } @@ -1756,11 +1134,11 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse, // Parse embedded tool calls from content (e.g., [Called tool_name with args: {...}]) contentStr := content.String() - cleanedContent, embeddedToolUses := e.parseEmbeddedToolCalls(contentStr, processedIDs) + cleanedContent, embeddedToolUses := kiroclaude.ParseEmbeddedToolCalls(contentStr, processedIDs) toolUses = append(toolUses, embeddedToolUses...) // Deduplicate all tool uses - toolUses = deduplicateToolUses(toolUses) + toolUses = kiroclaude.DeduplicateToolUses(toolUses) // Apply fallback logic for stop_reason if not provided by upstream // Priority: upstream stopReason > tool_use detection > end_turn default @@ -1876,13 +1254,59 @@ func (e *KiroExecutor) readEventStreamMessage(reader *bufio.Reader) (*eventStrea }, nil } +func skipEventStreamHeaderValue(headers []byte, offset int, valueType byte) (int, bool) { + switch valueType { + case 0, 1: // bool true / bool false + return offset, true + case 2: // byte + if offset+1 > len(headers) { + return offset, false + } + return offset + 1, true + case 3: // short + if offset+2 > len(headers) { + return offset, false + } + return offset + 2, true + case 4: // int + if offset+4 > len(headers) { + return offset, false + } + return offset + 4, true + case 5: // long + if offset+8 > len(headers) { + return offset, false + } + return offset + 8, true + case 6: // byte array (2-byte length + data) + if offset+2 > len(headers) { + return offset, false + } + valueLen := int(binary.BigEndian.Uint16(headers[offset : offset+2])) + offset += 2 + if offset+valueLen > len(headers) { + return offset, false + } + return offset + valueLen, true + case 8: // timestamp + if offset+8 > len(headers) { + return offset, false + } + return offset + 8, true + case 9: // uuid + if offset+16 > len(headers) { + return offset, false + } + return offset + 16, true + default: + return offset, false + } +} + // extractEventTypeFromBytes extracts the event type from raw header bytes (without prelude CRC prefix) func (e *KiroExecutor) extractEventTypeFromBytes(headers []byte) string { offset := 0 for offset < len(headers) { - if offset >= len(headers) { - break - } nameLen := int(headers[offset]) offset++ if offset+nameLen > len(headers) { @@ -1912,240 +1336,21 @@ func (e *KiroExecutor) extractEventTypeFromBytes(headers []byte) string { if name == ":event-type" { return value } - } else { - // Skip other types + continue + } + + nextOffset, ok := skipEventStreamHeaderValue(headers, offset, valueType) + if !ok { break } + offset = nextOffset } return "" } -// extractEventType extracts the event type from AWS Event Stream headers -// Note: This is the legacy version that expects headerBytes to include prelude CRC prefix -func (e *KiroExecutor) extractEventType(headerBytes []byte) string { - // Skip prelude CRC (4 bytes) - if len(headerBytes) < 4 { - return "" - } - headers := headerBytes[4:] - - offset := 0 - for offset < len(headers) { - if offset >= len(headers) { - break - } - nameLen := int(headers[offset]) - offset++ - if offset+nameLen > len(headers) { - break - } - name := string(headers[offset : offset+nameLen]) - offset += nameLen - - if offset >= len(headers) { - break - } - valueType := headers[offset] - offset++ - - if valueType == 7 { // String type - if offset+2 > len(headers) { - break - } - valueLen := int(binary.BigEndian.Uint16(headers[offset : offset+2])) - offset += 2 - if offset+valueLen > len(headers) { - break - } - value := string(headers[offset : offset+valueLen]) - offset += valueLen - - if name == ":event-type" { - return value - } - } else { - // Skip other types - break - } - } - return "" -} - -// getString safely extracts a string from a map -func getString(m map[string]interface{}, key string) string { - if v, ok := m[key].(string); ok { - return v - } - return "" -} - -// buildClaudeResponse constructs a Claude-compatible response. -// Supports tool_use blocks when tools are present in the response. -// Supports thinking blocks - parses tags and converts to Claude thinking content blocks. -// stopReason is passed from upstream; fallback logic applied if empty. -func (e *KiroExecutor) buildClaudeResponse(content string, toolUses []kiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte { - var contentBlocks []map[string]interface{} - - // Extract thinking blocks and text from content - // This handles ... tags from Kiro's response - if content != "" { - blocks := e.extractThinkingFromContent(content) - contentBlocks = append(contentBlocks, blocks...) - - // DIAGNOSTIC: Log if thinking blocks were extracted - for _, block := range blocks { - if block["type"] == "thinking" { - thinkingContent := block["thinking"].(string) - log.Infof("kiro: buildClaudeResponse extracted thinking block (len: %d)", len(thinkingContent)) - } - } - } - - // Add tool_use blocks - for _, toolUse := range toolUses { - contentBlocks = append(contentBlocks, map[string]interface{}{ - "type": "tool_use", - "id": toolUse.ToolUseID, - "name": toolUse.Name, - "input": toolUse.Input, - }) - } - - // Ensure at least one content block (Claude API requires non-empty content) - if len(contentBlocks) == 0 { - contentBlocks = append(contentBlocks, map[string]interface{}{ - "type": "text", - "text": "", - }) - } - - // Use upstream stopReason; apply fallback logic if not provided - if stopReason == "" { - stopReason = "end_turn" - if len(toolUses) > 0 { - stopReason = "tool_use" - } - log.Debugf("kiro: buildClaudeResponse using fallback stop_reason: %s", stopReason) - } - - // Log warning if response was truncated due to max_tokens - if stopReason == "max_tokens" { - log.Warnf("kiro: response truncated due to max_tokens limit (buildClaudeResponse)") - } - - response := map[string]interface{}{ - "id": "msg_" + uuid.New().String()[:24], - "type": "message", - "role": "assistant", - "model": model, - "content": contentBlocks, - "stop_reason": stopReason, - "usage": map[string]interface{}{ - "input_tokens": usageInfo.InputTokens, - "output_tokens": usageInfo.OutputTokens, - }, - } - result, _ := json.Marshal(response) - return result -} - -// extractThinkingFromContent parses content to extract thinking blocks and text. -// Returns a list of content blocks in the order they appear in the content. -// Handles interleaved thinking and text blocks correctly. -// Based on the streaming implementation's thinking tag handling. -func (e *KiroExecutor) extractThinkingFromContent(content string) []map[string]interface{} { - var blocks []map[string]interface{} - - if content == "" { - return blocks - } - - // Check if content contains thinking tags at all - if !strings.Contains(content, thinkingStartTag) { - // No thinking tags, return as plain text - return []map[string]interface{}{ - { - "type": "text", - "text": content, - }, - } - } - - log.Debugf("kiro: extractThinkingFromContent - found thinking tags in content (len: %d)", len(content)) - - remaining := content - - for len(remaining) > 0 { - // Look for tag - startIdx := strings.Index(remaining, thinkingStartTag) - - if startIdx == -1 { - // No more thinking tags, add remaining as text - if strings.TrimSpace(remaining) != "" { - blocks = append(blocks, map[string]interface{}{ - "type": "text", - "text": remaining, - }) - } - break - } - - // Add text before thinking tag (if any meaningful content) - if startIdx > 0 { - textBefore := remaining[:startIdx] - if strings.TrimSpace(textBefore) != "" { - blocks = append(blocks, map[string]interface{}{ - "type": "text", - "text": textBefore, - }) - } - } - - // Move past the opening tag - remaining = remaining[startIdx+len(thinkingStartTag):] - - // Find closing tag - endIdx := strings.Index(remaining, thinkingEndTag) - - if endIdx == -1 { - // No closing tag found, treat rest as thinking content (incomplete response) - if strings.TrimSpace(remaining) != "" { - blocks = append(blocks, map[string]interface{}{ - "type": "thinking", - "thinking": remaining, - }) - log.Warnf("kiro: extractThinkingFromContent - missing closing tag") - } - break - } - - // Extract thinking content between tags - thinkContent := remaining[:endIdx] - if strings.TrimSpace(thinkContent) != "" { - blocks = append(blocks, map[string]interface{}{ - "type": "thinking", - "thinking": thinkContent, - }) - log.Debugf("kiro: extractThinkingFromContent - extracted thinking block (len: %d)", len(thinkContent)) - } - - // Move past the closing tag - remaining = remaining[endIdx+len(thinkingEndTag):] - } - - // If no blocks were created (all whitespace), return empty text block - if len(blocks) == 0 { - blocks = append(blocks, map[string]interface{}{ - "type": "text", - "text": "", - }) - } - - return blocks -} - -// NOTE: Tool uses are now extracted from API response, not parsed from text +// NOTE: Response building functions moved to internal/translator/kiro/claude/kiro_claude_response.go +// The executor now uses kiroclaude.BuildClaudeResponse() and kiroclaude.ExtractThinkingFromContent() instead // streamToChannel converts AWS Event Stream to channel-based streaming. // Supports tool calling - emits tool_use content blocks when tools are used. @@ -2155,12 +1360,12 @@ func (e *KiroExecutor) extractThinkingFromContent(content string) []map[string]i func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter) { reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers var totalUsage usage.Detail - var hasToolUses bool // Track if any tool uses were emitted - var upstreamStopReason string // Track stop_reason from upstream events + var hasToolUses bool // Track if any tool uses were emitted + var upstreamStopReason string // Track stop_reason from upstream events // Tool use state tracking for input buffering and deduplication processedIDs := make(map[string]bool) - var currentToolUse *toolUseState + var currentToolUse *kiroclaude.ToolUseState // NOTE: Duplicate content filtering removed - it was causing legitimate repeated // content (like consecutive newlines) to be incorrectly filtered out. @@ -2185,17 +1390,17 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // Thinking mode state tracking - based on amq2api implementation // Tracks whether we're inside a block and handles partial tags inThinkBlock := false - pendingStartTagChars := 0 // Number of chars that might be start of - pendingEndTagChars := 0 // Number of chars that might be start of - isThinkingBlockOpen := false // Track if thinking content block is open - thinkingBlockIndex := -1 // Index of the thinking content block + pendingStartTagChars := 0 // Number of chars that might be start of + pendingEndTagChars := 0 // Number of chars that might be start of + isThinkingBlockOpen := false // Track if thinking content block is open + thinkingBlockIndex := -1 // Index of the thinking content block // Pre-calculate input tokens from request if possible // Kiro uses Claude format, so try Claude format first, then OpenAI format, then fallback if enc, err := getTokenizer(model); err == nil { var inputTokens int64 var countMethod string - + // Try Claude format first (Kiro uses Claude API format) if inp, err := countClaudeChatTokens(enc, claudeBody); err == nil && inp > 0 { inputTokens = inp @@ -2212,7 +1417,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } countMethod = "estimate" } - + totalUsage.InputTokens = inputTokens log.Debugf("kiro: streamToChannel pre-calculated input tokens: %d (method: %s, claude body: %d bytes, original req: %d bytes)", totalUsage.InputTokens, countMethod, len(claudeBody), len(originalReq)) @@ -2239,7 +1444,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out if eventErr != nil { // Log the error log.Errorf("kiro: streamToChannel error: %v", eventErr) - + // Send error to channel for client notification out <- cliproxyexecutor.StreamChunk{Err: eventErr} return @@ -2247,71 +1452,71 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out if msg == nil { // Normal end of stream (EOF) // Flush any incomplete tool use before ending stream - if currentToolUse != nil && !processedIDs[currentToolUse.toolUseID] { - log.Warnf("kiro: flushing incomplete tool use at EOF: %s (ID: %s)", currentToolUse.name, currentToolUse.toolUseID) - fullInput := currentToolUse.inputBuffer.String() - repairedJSON := repairJSON(fullInput) + if currentToolUse != nil && !processedIDs[currentToolUse.ToolUseID] { + log.Warnf("kiro: flushing incomplete tool use at EOF: %s (ID: %s)", currentToolUse.Name, currentToolUse.ToolUseID) + fullInput := currentToolUse.InputBuffer.String() + repairedJSON := kiroclaude.RepairJSON(fullInput) var finalInput map[string]interface{} if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil { log.Warnf("kiro: failed to parse incomplete tool input at EOF: %v", err) finalInput = make(map[string]interface{}) } - - processedIDs[currentToolUse.toolUseID] = true + + processedIDs[currentToolUse.ToolUseID] = true contentBlockIndex++ - + // Send tool_use content block - blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", currentToolUse.toolUseID, currentToolUse.name) + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", currentToolUse.ToolUseID, currentToolUse.Name) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) for _, chunk := range sseData { if chunk != "" { out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} } } - + // Send tool input as delta inputBytes, _ := json.Marshal(finalInput) - inputDelta := e.buildClaudeInputJsonDeltaEvent(string(inputBytes), contentBlockIndex) + inputDelta := kiroclaude.BuildClaudeInputJsonDeltaEvent(string(inputBytes), contentBlockIndex) sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam) for _, chunk := range sseData { if chunk != "" { out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} } } - + // Close block - blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex) + blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex) sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} } } - + hasToolUses = true currentToolUse = nil } - + // Flush any pending tag characters at EOF // These are partial tag prefixes that were held back waiting for more data // Since no more data is coming, output them as regular text var pendingText string if pendingStartTagChars > 0 { - pendingText = thinkingStartTag[:pendingStartTagChars] + pendingText = kirocommon.ThinkingStartTag[:pendingStartTagChars] log.Debugf("kiro: flushing pending start tag chars at EOF: %q", pendingText) pendingStartTagChars = 0 } if pendingEndTagChars > 0 { - pendingText += thinkingEndTag[:pendingEndTagChars] + pendingText += kirocommon.ThinkingEndTag[:pendingEndTagChars] log.Debugf("kiro: flushing pending end tag chars at EOF: %q", pendingText) pendingEndTagChars = 0 } - + // Output pending text if any if pendingText != "" { // If we're in a thinking block, output as thinking content if inThinkBlock && isThinkingBlockOpen { - thinkingEvent := e.buildClaudeThinkingDeltaEvent(pendingText, thinkingBlockIndex) + thinkingEvent := kiroclaude.BuildClaudeThinkingDeltaEvent(pendingText, thinkingBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, thinkingEvent, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2323,7 +1528,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out if !isTextBlockOpen { contentBlockIndex++ isTextBlockOpen = true - blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2331,8 +1536,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } } - - claudeEvent := e.buildClaudeStreamEvent(pendingText, contentBlockIndex) + + claudeEvent := kiroclaude.BuildClaudeStreamEvent(pendingText, contentBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2386,18 +1591,18 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // Extract stop_reason from various event formats (streaming) // Kiro/Amazon Q API may include stop_reason in different locations - if sr := getString(event, "stop_reason"); sr != "" { + if sr := kirocommon.GetString(event, "stop_reason"); sr != "" { upstreamStopReason = sr log.Debugf("kiro: streamToChannel found stop_reason (top-level): %s", upstreamStopReason) } - if sr := getString(event, "stopReason"); sr != "" { + if sr := kirocommon.GetString(event, "stopReason"); sr != "" { upstreamStopReason = sr log.Debugf("kiro: streamToChannel found stopReason (top-level): %s", upstreamStopReason) } // Send message_start on first event if !messageStartSent { - msgStart := e.buildClaudeMessageStartEvent(model, totalUsage.InputTokens) + msgStart := kiroclaude.BuildClaudeMessageStartEvent(model, totalUsage.InputTokens) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStart, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2415,11 +1620,11 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out case "messageStopEvent", "message_stop": // Handle message stop events which may contain stop_reason - if sr := getString(event, "stop_reason"); sr != "" { + if sr := kirocommon.GetString(event, "stop_reason"); sr != "" { upstreamStopReason = sr log.Debugf("kiro: streamToChannel found stop_reason in messageStopEvent: %s", upstreamStopReason) } - if sr := getString(event, "stopReason"); sr != "" { + if sr := kirocommon.GetString(event, "stopReason"); sr != "" { upstreamStopReason = sr log.Debugf("kiro: streamToChannel found stopReason in messageStopEvent: %s", upstreamStopReason) } @@ -2427,17 +1632,17 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out case "assistantResponseEvent": var contentDelta string var toolUses []map[string]interface{} - + if assistantResp, ok := event["assistantResponseEvent"].(map[string]interface{}); ok { if c, ok := assistantResp["content"].(string); ok { contentDelta = c } // Extract stop_reason from assistantResponseEvent - if sr := getString(assistantResp, "stop_reason"); sr != "" { + if sr := kirocommon.GetString(assistantResp, "stop_reason"); sr != "" { upstreamStopReason = sr log.Debugf("kiro: streamToChannel found stop_reason in assistantResponseEvent: %s", upstreamStopReason) } - if sr := getString(assistantResp, "stopReason"); sr != "" { + if sr := kirocommon.GetString(assistantResp, "stopReason"); sr != "" { upstreamStopReason = sr log.Debugf("kiro: streamToChannel found stopReason in assistantResponseEvent: %s", upstreamStopReason) } @@ -2473,7 +1678,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out outputLen += len(contentDelta) // Accumulate content for streaming token calculation accumulatedContent.WriteString(contentDelta) - + // Real-time usage estimation: Check if we should send a usage update // This helps clients track context usage during long thinking sessions shouldSendUsageUpdate := false @@ -2482,7 +1687,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } else if time.Since(lastUsageUpdateTime) >= usageUpdateTimeInterval && accumulatedContent.Len() > lastUsageUpdateLen { shouldSendUsageUpdate = true } - + if shouldSendUsageUpdate { // Calculate current output tokens using tiktoken var currentOutputTokens int64 @@ -2498,24 +1703,24 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out currentOutputTokens = 1 } } - + // Only send update if token count has changed significantly (at least 10 tokens) if currentOutputTokens > lastReportedOutputTokens+10 { // Send ping event with usage information // This is a non-blocking update that clients can optionally process - pingEvent := e.buildClaudePingEventWithUsage(totalUsage.InputTokens, currentOutputTokens) + pingEvent := kiroclaude.BuildClaudePingEventWithUsage(totalUsage.InputTokens, currentOutputTokens) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, pingEvent, &translatorParam) for _, chunk := range sseData { if chunk != "" { out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} } } - + lastReportedOutputTokens = currentOutputTokens log.Debugf("kiro: sent real-time usage update - input: %d, output: %d (accumulated: %d chars)", totalUsage.InputTokens, currentOutputTokens, accumulatedContent.Len()) } - + lastUsageUpdateLen = accumulatedContent.Len() lastUsageUpdateTime = time.Now() } @@ -2526,20 +1731,20 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // If we have pending start tag chars from previous chunk, prepend them if pendingStartTagChars > 0 { - remaining = thinkingStartTag[:pendingStartTagChars] + remaining + remaining = kirocommon.ThinkingStartTag[:pendingStartTagChars] + remaining pendingStartTagChars = 0 } - + // If we have pending end tag chars from previous chunk, prepend them if pendingEndTagChars > 0 { - remaining = thinkingEndTag[:pendingEndTagChars] + remaining + remaining = kirocommon.ThinkingEndTag[:pendingEndTagChars] + remaining pendingEndTagChars = 0 } for len(remaining) > 0 { if inThinkBlock { // Inside thinking block - look for end tag - endIdx := strings.Index(remaining, thinkingEndTag) + endIdx := strings.Index(remaining, kirocommon.ThinkingEndTag) if endIdx >= 0 { // Found end tag - emit any content before end tag, then close block thinkContent := remaining[:endIdx] @@ -2550,7 +1755,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out contentBlockIndex++ thinkingBlockIndex = contentBlockIndex isThinkingBlockOpen = true - blockStart := e.buildClaudeContentBlockStartEvent(thinkingBlockIndex, "thinking", "", "") + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(thinkingBlockIndex, "thinking", "", "") sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2558,9 +1763,9 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } } - + // Send thinking delta immediately - thinkingEvent := e.buildClaudeThinkingDeltaEvent(thinkContent, thinkingBlockIndex) + thinkingEvent := kiroclaude.BuildClaudeThinkingDeltaEvent(thinkContent, thinkingBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, thinkingEvent, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2574,7 +1779,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // Close thinking block if isThinkingBlockOpen { - blockStop := e.buildClaudeContentBlockStopEvent(thinkingBlockIndex) + blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(thinkingBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2585,13 +1790,13 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } inThinkBlock = false - remaining = remaining[endIdx+len(thinkingEndTag):] + remaining = remaining[endIdx+len(kirocommon.ThinkingEndTag):] log.Debugf("kiro: exited thinking block") } else { // No end tag found - TRUE STREAMING: emit content immediately // Only save potential partial tag length for next iteration - pendingEnd := pendingTagSuffix(remaining, thinkingEndTag) - + pendingEnd := kiroclaude.PendingTagSuffix(remaining, kirocommon.ThinkingEndTag) + // Calculate content to emit immediately (excluding potential partial tag) var contentToEmit string if pendingEnd > 0 { @@ -2601,7 +1806,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } else { contentToEmit = remaining } - + // TRUE STREAMING: Emit thinking content immediately if contentToEmit != "" { // Start thinking block if not open @@ -2609,39 +1814,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out contentBlockIndex++ thinkingBlockIndex = contentBlockIndex isThinkingBlockOpen = true - blockStart := e.buildClaudeContentBlockStartEvent(thinkingBlockIndex, "thinking", "", "") - sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) - for _, chunk := range sseData { - if chunk != "" { - out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} - } - } - } - - // Send thinking delta immediately - TRUE STREAMING! - thinkingEvent := e.buildClaudeThinkingDeltaEvent(contentToEmit, thinkingBlockIndex) - sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, thinkingEvent, &translatorParam) - for _, chunk := range sseData { - if chunk != "" { - out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} - } - } - } - - remaining = "" - } - } else { - // Outside thinking block - look for start tag - startIdx := strings.Index(remaining, thinkingStartTag) - if startIdx >= 0 { - // Found start tag - emit text before it and switch to thinking mode - textBefore := remaining[:startIdx] - if textBefore != "" { - // Start text content block if needed - if !isTextBlockOpen { - contentBlockIndex++ - isTextBlockOpen = true - blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(thinkingBlockIndex, "thinking", "", "") sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2650,7 +1823,39 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } - claudeEvent := e.buildClaudeStreamEvent(textBefore, contentBlockIndex) + // Send thinking delta immediately - TRUE STREAMING! + thinkingEvent := kiroclaude.BuildClaudeThinkingDeltaEvent(contentToEmit, thinkingBlockIndex) + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, thinkingEvent, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + + remaining = "" + } + } else { + // Outside thinking block - look for start tag + startIdx := strings.Index(remaining, kirocommon.ThinkingStartTag) + if startIdx >= 0 { + // Found start tag - emit text before it and switch to thinking mode + textBefore := remaining[:startIdx] + if textBefore != "" { + // Start text content block if needed + if !isTextBlockOpen { + contentBlockIndex++ + isTextBlockOpen = true + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) + for _, chunk := range sseData { + if chunk != "" { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} + } + } + } + + claudeEvent := kiroclaude.BuildClaudeStreamEvent(textBefore, contentBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2661,7 +1866,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // Close text block before starting thinking block if isTextBlockOpen { - blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex) + blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2672,11 +1877,11 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } inThinkBlock = true - remaining = remaining[startIdx+len(thinkingStartTag):] + remaining = remaining[startIdx+len(kirocommon.ThinkingStartTag):] log.Debugf("kiro: entered thinking block") } else { // No start tag found - check for partial start tag at buffer end - pendingStart := pendingTagSuffix(remaining, thinkingStartTag) + pendingStart := kiroclaude.PendingTagSuffix(remaining, kirocommon.ThinkingStartTag) if pendingStart > 0 { // Emit text except potential partial tag textToEmit := remaining[:len(remaining)-pendingStart] @@ -2685,7 +1890,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out if !isTextBlockOpen { contentBlockIndex++ isTextBlockOpen = true - blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2694,7 +1899,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } - claudeEvent := e.buildClaudeStreamEvent(textToEmit, contentBlockIndex) + claudeEvent := kiroclaude.BuildClaudeStreamEvent(textToEmit, contentBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2711,7 +1916,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out if !isTextBlockOpen { contentBlockIndex++ isTextBlockOpen = true - blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2720,7 +1925,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } - claudeEvent := e.buildClaudeStreamEvent(remaining, contentBlockIndex) + claudeEvent := kiroclaude.BuildClaudeStreamEvent(remaining, contentBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2734,22 +1939,22 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } } - + // Handle tool uses in response (with deduplication) for _, tu := range toolUses { - toolUseID := getString(tu, "toolUseId") - + toolUseID := kirocommon.GetString(tu, "toolUseId") + // Check for duplicate if processedIDs[toolUseID] { log.Debugf("kiro: skipping duplicate tool use in stream: %s", toolUseID) continue } processedIDs[toolUseID] = true - + hasToolUses = true // Close text block if open before starting tool_use block if isTextBlockOpen && contentBlockIndex >= 0 { - blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex) + blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2758,19 +1963,19 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } isTextBlockOpen = false } - + // Emit tool_use content block contentBlockIndex++ - toolName := getString(tu, "name") - - blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", toolUseID, toolName) + toolName := kirocommon.GetString(tu, "name") + + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", toolUseID, toolName) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) for _, chunk := range sseData { if chunk != "" { out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} } } - + // Send input_json_delta with the tool input if input, ok := tu["input"].(map[string]interface{}); ok { inputJSON, err := json.Marshal(input) @@ -2778,7 +1983,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out log.Debugf("kiro: failed to marshal tool input: %v", err) // Don't continue - still need to close the block } else { - inputDelta := e.buildClaudeInputJsonDeltaEvent(string(inputJSON), contentBlockIndex) + inputDelta := kiroclaude.BuildClaudeInputJsonDeltaEvent(string(inputJSON), contentBlockIndex) sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2787,9 +1992,9 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } } - + // Close tool_use block (always close even if input marshal failed) - blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex) + blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex) sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2800,16 +2005,16 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out case "toolUseEvent": // Handle dedicated tool use events with input buffering - completedToolUses, newState := e.processToolUseEvent(event, currentToolUse, processedIDs) + completedToolUses, newState := kiroclaude.ProcessToolUseEvent(event, currentToolUse, processedIDs) currentToolUse = newState - + // Emit completed tool uses for _, tu := range completedToolUses { hasToolUses = true - + // Close text block if open if isTextBlockOpen && contentBlockIndex >= 0 { - blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex) + blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2818,23 +2023,23 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } isTextBlockOpen = false } - + contentBlockIndex++ - - blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", tu.ToolUseID, tu.Name) + + blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", tu.ToolUseID, tu.Name) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) for _, chunk := range sseData { if chunk != "" { out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")} } } - + if tu.Input != nil { inputJSON, err := json.Marshal(tu.Input) if err != nil { log.Debugf("kiro: failed to marshal tool input in toolUseEvent: %v", err) } else { - inputDelta := e.buildClaudeInputJsonDeltaEvent(string(inputJSON), contentBlockIndex) + inputDelta := kiroclaude.BuildClaudeInputJsonDeltaEvent(string(inputJSON), contentBlockIndex) sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2843,8 +2048,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } } } - - blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex) + + blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex) sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2875,7 +2080,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // Close content block if open if isTextBlockOpen && contentBlockIndex >= 0 { - blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex) + blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2935,7 +2140,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } // Send message_delta event - msgDelta := e.buildClaudeMessageDeltaEvent(stopReason, totalUsage) + msgDelta := kiroclaude.BuildClaudeMessageDeltaEvent(stopReason, totalUsage) sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgDelta, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2944,7 +2149,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out } // Send message_stop event separately - msgStop := e.buildClaudeMessageStopOnlyEvent() + msgStop := kiroclaude.BuildClaudeMessageStopOnlyEvent() sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam) for _, chunk := range sseData { if chunk != "" { @@ -2954,180 +2159,8 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out // reporter.publish is called via defer } - -// Claude SSE event builders -// All builders return complete SSE format with "event:" line for Claude client compatibility. -func (e *KiroExecutor) buildClaudeMessageStartEvent(model string, inputTokens int64) []byte { - event := map[string]interface{}{ - "type": "message_start", - "message": map[string]interface{}{ - "id": "msg_" + uuid.New().String()[:24], - "type": "message", - "role": "assistant", - "content": []interface{}{}, - "model": model, - "stop_reason": nil, - "stop_sequence": nil, - "usage": map[string]interface{}{"input_tokens": inputTokens, "output_tokens": 0}, - }, - } - result, _ := json.Marshal(event) - return []byte("event: message_start\ndata: " + string(result)) -} - -func (e *KiroExecutor) buildClaudeContentBlockStartEvent(index int, blockType, toolUseID, toolName string) []byte { - var contentBlock map[string]interface{} - switch blockType { - case "tool_use": - contentBlock = map[string]interface{}{ - "type": "tool_use", - "id": toolUseID, - "name": toolName, - "input": map[string]interface{}{}, - } - case "thinking": - contentBlock = map[string]interface{}{ - "type": "thinking", - "thinking": "", - } - default: - contentBlock = map[string]interface{}{ - "type": "text", - "text": "", - } - } - - event := map[string]interface{}{ - "type": "content_block_start", - "index": index, - "content_block": contentBlock, - } - result, _ := json.Marshal(event) - return []byte("event: content_block_start\ndata: " + string(result)) -} - -func (e *KiroExecutor) buildClaudeStreamEvent(contentDelta string, index int) []byte { - event := map[string]interface{}{ - "type": "content_block_delta", - "index": index, - "delta": map[string]interface{}{ - "type": "text_delta", - "text": contentDelta, - }, - } - result, _ := json.Marshal(event) - return []byte("event: content_block_delta\ndata: " + string(result)) -} - -// buildClaudeInputJsonDeltaEvent creates an input_json_delta event for tool use streaming -func (e *KiroExecutor) buildClaudeInputJsonDeltaEvent(partialJSON string, index int) []byte { - event := map[string]interface{}{ - "type": "content_block_delta", - "index": index, - "delta": map[string]interface{}{ - "type": "input_json_delta", - "partial_json": partialJSON, - }, - } - result, _ := json.Marshal(event) - return []byte("event: content_block_delta\ndata: " + string(result)) -} - -func (e *KiroExecutor) buildClaudeContentBlockStopEvent(index int) []byte { - event := map[string]interface{}{ - "type": "content_block_stop", - "index": index, - } - result, _ := json.Marshal(event) - return []byte("event: content_block_stop\ndata: " + string(result)) -} - -// buildClaudeMessageDeltaEvent creates the message_delta event with stop_reason and usage. -func (e *KiroExecutor) buildClaudeMessageDeltaEvent(stopReason string, usageInfo usage.Detail) []byte { - deltaEvent := map[string]interface{}{ - "type": "message_delta", - "delta": map[string]interface{}{ - "stop_reason": stopReason, - "stop_sequence": nil, - }, - "usage": map[string]interface{}{ - "input_tokens": usageInfo.InputTokens, - "output_tokens": usageInfo.OutputTokens, - }, - } - deltaResult, _ := json.Marshal(deltaEvent) - return []byte("event: message_delta\ndata: " + string(deltaResult)) -} - -// buildClaudeMessageStopOnlyEvent creates only the message_stop event. -func (e *KiroExecutor) buildClaudeMessageStopOnlyEvent() []byte { - stopEvent := map[string]interface{}{ - "type": "message_stop", - } - stopResult, _ := json.Marshal(stopEvent) - return []byte("event: message_stop\ndata: " + string(stopResult)) -} - -// buildClaudeFinalEvent constructs the final Claude-style event. -func (e *KiroExecutor) buildClaudeFinalEvent() []byte { - event := map[string]interface{}{ - "type": "message_stop", - } - result, _ := json.Marshal(event) - return []byte("event: message_stop\ndata: " + string(result)) -} - -// buildClaudePingEventWithUsage creates a ping event with embedded usage information. -// This is used for real-time usage estimation during streaming. -// The usage field is a non-standard extension that clients can optionally process. -// Clients that don't recognize the usage field will simply ignore it. -func (e *KiroExecutor) buildClaudePingEventWithUsage(inputTokens, outputTokens int64) []byte { - event := map[string]interface{}{ - "type": "ping", - "usage": map[string]interface{}{ - "input_tokens": inputTokens, - "output_tokens": outputTokens, - "total_tokens": inputTokens + outputTokens, - "estimated": true, // Flag to indicate this is an estimate, not final - }, - } - result, _ := json.Marshal(event) - return []byte("event: ping\ndata: " + string(result)) -} - -// buildClaudeThinkingDeltaEvent creates a thinking_delta event for Claude API compatibility. -// This is used when streaming thinking content wrapped in tags. -func (e *KiroExecutor) buildClaudeThinkingDeltaEvent(thinkingDelta string, index int) []byte { - event := map[string]interface{}{ - "type": "content_block_delta", - "index": index, - "delta": map[string]interface{}{ - "type": "thinking_delta", - "thinking": thinkingDelta, - }, - } - result, _ := json.Marshal(event) - return []byte("event: content_block_delta\ndata: " + string(result)) -} - -// pendingTagSuffix detects if the buffer ends with a partial prefix of the given tag. -// Returns the length of the partial match (0 if no match). -// Based on amq2api implementation for handling cross-chunk tag boundaries. -func pendingTagSuffix(buffer, tag string) int { - if buffer == "" || tag == "" { - return 0 - } - maxLen := len(buffer) - if maxLen > len(tag)-1 { - maxLen = len(tag) - 1 - } - for length := maxLen; length > 0; length-- { - if len(buffer) >= length && buffer[len(buffer)-length:] == tag[:length] { - return length - } - } - return 0 -} +// NOTE: Claude SSE event builders moved to internal/translator/kiro/claude/kiro_claude_stream.go +// The executor now uses kiroclaude.BuildClaude*Event() functions instead // CountTokens is not supported for Kiro provider. // Kiro/Amazon Q backend doesn't expose a token counting API. @@ -3281,209 +2314,6 @@ func (e *KiroExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c return updated, nil } -// streamEventStream converts AWS Event Stream to SSE (legacy method for gin.Context). -// Note: For full tool calling support, use streamToChannel instead. -func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c *gin.Context, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter) error { - reader := bufio.NewReader(body) - var totalUsage usage.Detail - - // Translator param for maintaining tool call state across streaming events - var translatorParam any - - // Pre-calculate input tokens from request if possible - if enc, err := getTokenizer(model); err == nil { - // Try OpenAI format first, then fall back to raw byte count estimation - if inp, err := countOpenAIChatTokens(enc, originalReq); err == nil && inp > 0 { - totalUsage.InputTokens = inp - } else { - // Fallback: estimate from raw request size (roughly 4 chars per token) - totalUsage.InputTokens = int64(len(originalReq) / 4) - if totalUsage.InputTokens == 0 && len(originalReq) > 0 { - totalUsage.InputTokens = 1 - } - } - log.Debugf("kiro: streamEventStream pre-calculated input tokens: %d (request size: %d bytes)", totalUsage.InputTokens, len(originalReq)) - } - - contentBlockIndex := -1 - messageStartSent := false - isBlockOpen := false - var outputLen int - - for { - select { - case <-ctx.Done(): - return ctx.Err() - default: - } - - prelude := make([]byte, 8) - _, err := io.ReadFull(reader, prelude) - if err == io.EOF { - break - } - if err != nil { - return fmt.Errorf("failed to read prelude: %w", err) - } - - totalLen := binary.BigEndian.Uint32(prelude[0:4]) - if totalLen < 8 { - return fmt.Errorf("invalid message length: %d", totalLen) - } - if totalLen > kiroMaxMessageSize { - return fmt.Errorf("message too large: %d bytes", totalLen) - } - headersLen := binary.BigEndian.Uint32(prelude[4:8]) - - remaining := make([]byte, totalLen-8) - _, err = io.ReadFull(reader, remaining) - if err != nil { - return fmt.Errorf("failed to read message: %w", err) - } - - // Validate headersLen to prevent slice out of bounds - if headersLen+4 > uint32(len(remaining)) { - log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining)) - continue - } - - eventType := e.extractEventType(remaining[:headersLen+4]) - - payloadStart := 4 + headersLen - payloadEnd := uint32(len(remaining)) - 4 - if payloadStart >= payloadEnd { - continue - } - - payload := remaining[payloadStart:payloadEnd] - appendAPIResponseChunk(ctx, e.cfg, payload) - - var event map[string]interface{} - if err := json.Unmarshal(payload, &event); err != nil { - log.Warnf("kiro: failed to unmarshal event payload: %v, raw: %s", err, string(payload)) - continue - } - - if !messageStartSent { - msgStart := e.buildClaudeMessageStartEvent(model, totalUsage.InputTokens) - sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStart, &translatorParam) - for _, chunk := range sseData { - if chunk != "" { - c.Writer.Write([]byte(chunk + "\n\n")) - } - } - c.Writer.Flush() - messageStartSent = true - } - - switch eventType { - case "assistantResponseEvent": - var contentDelta string - if assistantResp, ok := event["assistantResponseEvent"].(map[string]interface{}); ok { - if ct, ok := assistantResp["content"].(string); ok { - contentDelta = ct - } - } - if contentDelta == "" { - if ct, ok := event["content"].(string); ok { - contentDelta = ct - } - } - - if contentDelta != "" { - outputLen += len(contentDelta) - // Start text content block if needed - if !isBlockOpen { - contentBlockIndex++ - isBlockOpen = true - blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "text", "", "") - sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam) - for _, chunk := range sseData { - if chunk != "" { - c.Writer.Write([]byte(chunk + "\n\n")) - } - } - c.Writer.Flush() - } - - claudeEvent := e.buildClaudeStreamEvent(contentDelta, contentBlockIndex) - sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, claudeEvent, &translatorParam) - for _, chunk := range sseData { - if chunk != "" { - c.Writer.Write([]byte(chunk + "\n\n")) - } - } - c.Writer.Flush() - } - - // Note: For full toolUseEvent support, use streamToChannel - - case "supplementaryWebLinksEvent": - if inputTokens, ok := event["inputTokens"].(float64); ok { - totalUsage.InputTokens = int64(inputTokens) - } - if outputTokens, ok := event["outputTokens"].(float64); ok { - totalUsage.OutputTokens = int64(outputTokens) - } - } - - if usageEvent, ok := event["supplementaryWebLinksEvent"].(map[string]interface{}); ok { - if inputTokens, ok := usageEvent["inputTokens"].(float64); ok { - totalUsage.InputTokens = int64(inputTokens) - } - if outputTokens, ok := usageEvent["outputTokens"].(float64); ok { - totalUsage.OutputTokens = int64(outputTokens) - } - } - } - - // Close content block if open - if isBlockOpen && contentBlockIndex >= 0 { - blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex) - sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam) - for _, chunk := range sseData { - if chunk != "" { - c.Writer.Write([]byte(chunk + "\n\n")) - } - } - c.Writer.Flush() - } - - // Fallback for output tokens if not received from upstream - if totalUsage.OutputTokens == 0 && outputLen > 0 { - totalUsage.OutputTokens = int64(outputLen / 4) - if totalUsage.OutputTokens == 0 { - totalUsage.OutputTokens = 1 - } - } - totalUsage.TotalTokens = totalUsage.InputTokens + totalUsage.OutputTokens - - // Send message_delta event - msgDelta := e.buildClaudeMessageDeltaEvent("end_turn", totalUsage) - sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgDelta, &translatorParam) - for _, chunk := range sseData { - if chunk != "" { - c.Writer.Write([]byte(chunk + "\n\n")) - } - } - c.Writer.Flush() - - // Send message_stop event separately - msgStop := e.buildClaudeMessageStopOnlyEvent() - sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam) - for _, chunk := range sseData { - if chunk != "" { - c.Writer.Write([]byte(chunk + "\n\n")) - } - } - - c.Writer.Write([]byte("data: [DONE]\n\n")) - c.Writer.Flush() - - reporter.publish(ctx, totalUsage) - return nil -} - // isTokenExpired checks if a JWT access token has expired. // Returns true if the token is expired or cannot be parsed. func (e *KiroExecutor) isTokenExpired(accessToken string) bool { @@ -3542,666 +2372,6 @@ func (e *KiroExecutor) isTokenExpired(accessToken string) bool { return isExpired } -// ============================================================================ -// Message Merging Support - Merge adjacent messages with the same role -// Based on AIClient-2-API implementation -// ============================================================================ - -// mergeAdjacentMessages merges adjacent messages with the same role. -// This reduces API call complexity and improves compatibility. -// Based on AIClient-2-API implementation. -func mergeAdjacentMessages(messages []gjson.Result) []gjson.Result { - if len(messages) <= 1 { - return messages - } - - var merged []gjson.Result - for _, msg := range messages { - if len(merged) == 0 { - merged = append(merged, msg) - continue - } - - lastMsg := merged[len(merged)-1] - currentRole := msg.Get("role").String() - lastRole := lastMsg.Get("role").String() - - if currentRole == lastRole { - // Merge content from current message into last message - mergedContent := mergeMessageContent(lastMsg, msg) - // Create a new merged message JSON - mergedMsg := createMergedMessage(lastRole, mergedContent) - merged[len(merged)-1] = gjson.Parse(mergedMsg) - } else { - merged = append(merged, msg) - } - } - - return merged -} - -// mergeMessageContent merges the content of two messages with the same role. -// Handles both string content and array content (with text, tool_use, tool_result blocks). -func mergeMessageContent(msg1, msg2 gjson.Result) string { - content1 := msg1.Get("content") - content2 := msg2.Get("content") - - // Extract content blocks from both messages - var blocks1, blocks2 []map[string]interface{} - - if content1.IsArray() { - for _, block := range content1.Array() { - blocks1 = append(blocks1, blockToMap(block)) - } - } else if content1.Type == gjson.String { - blocks1 = append(blocks1, map[string]interface{}{ - "type": "text", - "text": content1.String(), - }) - } - - if content2.IsArray() { - for _, block := range content2.Array() { - blocks2 = append(blocks2, blockToMap(block)) - } - } else if content2.Type == gjson.String { - blocks2 = append(blocks2, map[string]interface{}{ - "type": "text", - "text": content2.String(), - }) - } - - // Merge text blocks if both end/start with text - if len(blocks1) > 0 && len(blocks2) > 0 { - if blocks1[len(blocks1)-1]["type"] == "text" && blocks2[0]["type"] == "text" { - // Merge the last text block of msg1 with the first text block of msg2 - text1 := blocks1[len(blocks1)-1]["text"].(string) - text2 := blocks2[0]["text"].(string) - blocks1[len(blocks1)-1]["text"] = text1 + "\n" + text2 - blocks2 = blocks2[1:] // Remove the merged block from blocks2 - } - } - - // Combine all blocks - allBlocks := append(blocks1, blocks2...) - - // Convert to JSON - result, _ := json.Marshal(allBlocks) - return string(result) -} - -// blockToMap converts a gjson.Result block to a map[string]interface{} -func blockToMap(block gjson.Result) map[string]interface{} { - result := make(map[string]interface{}) - block.ForEach(func(key, value gjson.Result) bool { - if value.IsObject() { - result[key.String()] = blockToMap(value) - } else if value.IsArray() { - var arr []interface{} - for _, item := range value.Array() { - if item.IsObject() { - arr = append(arr, blockToMap(item)) - } else { - arr = append(arr, item.Value()) - } - } - result[key.String()] = arr - } else { - result[key.String()] = value.Value() - } - return true - }) - return result -} - -// createMergedMessage creates a JSON string for a merged message -func createMergedMessage(role string, content string) string { - msg := map[string]interface{}{ - "role": role, - "content": json.RawMessage(content), - } - result, _ := json.Marshal(msg) - return string(result) -} - -// ============================================================================ -// Tool Calling Support - Embedded tool call parsing and input buffering -// Based on amq2api and AIClient-2-API implementations -// ============================================================================ - -// toolUseState tracks the state of an in-progress tool use during streaming. -type toolUseState struct { - toolUseID string - name string - inputBuffer strings.Builder - isComplete bool -} - -// Pre-compiled regex patterns for performance (avoid recompilation on each call) -var ( - // embeddedToolCallPattern matches [Called tool_name with args: {...}] format - // This pattern is used by Kiro when it embeds tool calls in text content - embeddedToolCallPattern = regexp.MustCompile(`\[Called\s+(\w+)\s+with\s+args:\s*`) - // whitespaceCollapsePattern collapses multiple whitespace characters into single space - whitespaceCollapsePattern = regexp.MustCompile(`\s+`) - // trailingCommaPattern matches trailing commas before closing braces/brackets - trailingCommaPattern = regexp.MustCompile(`,\s*([}\]])`) -) - -// parseEmbeddedToolCalls extracts [Called tool_name with args: {...}] format from text. -// Kiro sometimes embeds tool calls in text content instead of using toolUseEvent. -// Returns the cleaned text (with tool calls removed) and extracted tool uses. -func (e *KiroExecutor) parseEmbeddedToolCalls(text string, processedIDs map[string]bool) (string, []kiroToolUse) { - if !strings.Contains(text, "[Called") { - return text, nil - } - - var toolUses []kiroToolUse - cleanText := text - - // Find all [Called markers - matches := embeddedToolCallPattern.FindAllStringSubmatchIndex(text, -1) - if len(matches) == 0 { - return text, nil - } - - // Process matches in reverse order to maintain correct indices - for i := len(matches) - 1; i >= 0; i-- { - matchStart := matches[i][0] - toolNameStart := matches[i][2] - toolNameEnd := matches[i][3] - - if toolNameStart < 0 || toolNameEnd < 0 { - continue - } - - toolName := text[toolNameStart:toolNameEnd] - - // Find the JSON object start (after "with args:") - jsonStart := matches[i][1] - if jsonStart >= len(text) { - continue - } - - // Skip whitespace to find the opening brace - for jsonStart < len(text) && (text[jsonStart] == ' ' || text[jsonStart] == '\t') { - jsonStart++ - } - - if jsonStart >= len(text) || text[jsonStart] != '{' { - continue - } - - // Find matching closing bracket - jsonEnd := findMatchingBracket(text, jsonStart) - if jsonEnd < 0 { - continue - } - - // Extract JSON and find the closing bracket of [Called ...] - jsonStr := text[jsonStart : jsonEnd+1] - - // Find the closing ] after the JSON - closingBracket := jsonEnd + 1 - for closingBracket < len(text) && text[closingBracket] != ']' { - closingBracket++ - } - if closingBracket >= len(text) { - continue - } - - // Extract and repair the full tool call text - fullMatch := text[matchStart : closingBracket+1] - - // Repair and parse JSON - repairedJSON := repairJSON(jsonStr) - var inputMap map[string]interface{} - if err := json.Unmarshal([]byte(repairedJSON), &inputMap); err != nil { - log.Debugf("kiro: failed to parse embedded tool call JSON: %v, raw: %s", err, jsonStr) - continue - } - - // Generate unique tool ID - toolUseID := "toolu_" + uuid.New().String()[:12] - - // Check for duplicates using name+input as key - dedupeKey := toolName + ":" + repairedJSON - if processedIDs != nil { - if processedIDs[dedupeKey] { - log.Debugf("kiro: skipping duplicate embedded tool call: %s", toolName) - // Still remove from text even if duplicate - cleanText = strings.Replace(cleanText, fullMatch, "", 1) - continue - } - processedIDs[dedupeKey] = true - } - - toolUses = append(toolUses, kiroToolUse{ - ToolUseID: toolUseID, - Name: toolName, - Input: inputMap, - }) - - log.Infof("kiro: extracted embedded tool call: %s (ID: %s)", toolName, toolUseID) - - // Remove from clean text - cleanText = strings.Replace(cleanText, fullMatch, "", 1) - } - - // Clean up extra whitespace - cleanText = strings.TrimSpace(cleanText) - cleanText = whitespaceCollapsePattern.ReplaceAllString(cleanText, " ") - - return cleanText, toolUses -} - -// findMatchingBracket finds the index of the closing brace/bracket that matches -// the opening one at startPos. Handles nested objects and strings correctly. -func findMatchingBracket(text string, startPos int) int { - if startPos >= len(text) { - return -1 - } - - openChar := text[startPos] - var closeChar byte - switch openChar { - case '{': - closeChar = '}' - case '[': - closeChar = ']' - default: - return -1 - } - - depth := 1 - inString := false - escapeNext := false - - for i := startPos + 1; i < len(text); i++ { - char := text[i] - - if escapeNext { - escapeNext = false - continue - } - - if char == '\\' && inString { - escapeNext = true - continue - } - - if char == '"' { - inString = !inString - continue - } - - if !inString { - if char == openChar { - depth++ - } else if char == closeChar { - depth-- - if depth == 0 { - return i - } - } - } - } - - return -1 -} - -// repairJSON attempts to fix common JSON issues that may occur in tool call arguments. -// Based on AIClient-2-API's JSON repair implementation with a more conservative strategy. -// -// Conservative repair strategy: -// 1. First try to parse JSON directly - if valid, return as-is -// 2. Only attempt repair if parsing fails -// 3. After repair, validate the result - if still invalid, return original -// -// Handles incomplete JSON by balancing brackets and removing trailing incomplete content. -// Uses pre-compiled regex patterns for performance. -func repairJSON(jsonString string) string { - // Handle empty or invalid input - if jsonString == "" { - return "{}" - } - - str := strings.TrimSpace(jsonString) - if str == "" { - return "{}" - } - - // CONSERVATIVE STRATEGY: First try to parse directly - // If the JSON is already valid, return it unchanged - var testParse interface{} - if err := json.Unmarshal([]byte(str), &testParse); err == nil { - log.Debugf("kiro: repairJSON - JSON is already valid, returning unchanged") - return str - } - - log.Debugf("kiro: repairJSON - JSON parse failed, attempting repair") - originalStr := str // Keep original for fallback - - // First, escape unescaped newlines/tabs within JSON string values - str = escapeNewlinesInStrings(str) - // Remove trailing commas before closing braces/brackets - str = trailingCommaPattern.ReplaceAllString(str, "$1") - - // Calculate bracket balance to detect incomplete JSON - braceCount := 0 // {} balance - bracketCount := 0 // [] balance - inString := false - escape := false - lastValidIndex := -1 - - for i := 0; i < len(str); i++ { - char := str[i] - - // Handle escape sequences - if escape { - escape = false - continue - } - - if char == '\\' { - escape = true - continue - } - - // Handle string boundaries - if char == '"' { - inString = !inString - continue - } - - // Skip characters inside strings (they don't affect bracket balance) - if inString { - continue - } - - // Track bracket balance - switch char { - case '{': - braceCount++ - case '}': - braceCount-- - case '[': - bracketCount++ - case ']': - bracketCount-- - } - - // Record last valid position (where brackets are balanced or positive) - if braceCount >= 0 && bracketCount >= 0 { - lastValidIndex = i - } - } - - // If brackets are unbalanced, try to repair - if braceCount > 0 || bracketCount > 0 { - // Truncate to last valid position if we have incomplete content - if lastValidIndex > 0 && lastValidIndex < len(str)-1 { - // Check if truncation would help (only truncate if there's trailing garbage) - truncated := str[:lastValidIndex+1] - // Recount brackets after truncation - braceCount = 0 - bracketCount = 0 - inString = false - escape = false - for i := 0; i < len(truncated); i++ { - char := truncated[i] - if escape { - escape = false - continue - } - if char == '\\' { - escape = true - continue - } - if char == '"' { - inString = !inString - continue - } - if inString { - continue - } - switch char { - case '{': - braceCount++ - case '}': - braceCount-- - case '[': - bracketCount++ - case ']': - bracketCount-- - } - } - str = truncated - } - - // Add missing closing brackets - for braceCount > 0 { - str += "}" - braceCount-- - } - for bracketCount > 0 { - str += "]" - bracketCount-- - } - } - - // CONSERVATIVE STRATEGY: Validate repaired JSON - // If repair didn't produce valid JSON, return original string - if err := json.Unmarshal([]byte(str), &testParse); err != nil { - log.Warnf("kiro: repairJSON - repair failed to produce valid JSON, returning original") - return originalStr - } - - log.Debugf("kiro: repairJSON - successfully repaired JSON") - return str -} - -// escapeNewlinesInStrings escapes literal newlines, tabs, and other control characters -// that appear inside JSON string values. This handles cases where streaming fragments -// contain unescaped control characters within string content. -func escapeNewlinesInStrings(raw string) string { - var result strings.Builder - result.Grow(len(raw) + 100) // Pre-allocate with some extra space - - inString := false - escaped := false - - for i := 0; i < len(raw); i++ { - c := raw[i] - - if escaped { - // Previous character was backslash, this is an escape sequence - result.WriteByte(c) - escaped = false - continue - } - - if c == '\\' && inString { - // Start of escape sequence - result.WriteByte(c) - escaped = true - continue - } - - if c == '"' { - // Toggle string state - inString = !inString - result.WriteByte(c) - continue - } - - if inString { - // Inside a string, escape control characters - switch c { - case '\n': - result.WriteString("\\n") - case '\r': - result.WriteString("\\r") - case '\t': - result.WriteString("\\t") - default: - result.WriteByte(c) - } - } else { - result.WriteByte(c) - } - } - - return result.String() -} - -// processToolUseEvent handles a toolUseEvent from the Kiro stream. -// It accumulates input fragments and emits tool_use blocks when complete. -// Returns events to emit and updated state. -func (e *KiroExecutor) processToolUseEvent(event map[string]interface{}, currentToolUse *toolUseState, processedIDs map[string]bool) ([]kiroToolUse, *toolUseState) { - var toolUses []kiroToolUse - - // Extract from nested toolUseEvent or direct format - tu := event - if nested, ok := event["toolUseEvent"].(map[string]interface{}); ok { - tu = nested - } - - toolUseID := getString(tu, "toolUseId") - toolName := getString(tu, "name") - isStop := false - if stop, ok := tu["stop"].(bool); ok { - isStop = stop - } - - // Get input - can be string (fragment) or object (complete) - var inputFragment string - var inputMap map[string]interface{} - - if inputRaw, ok := tu["input"]; ok { - switch v := inputRaw.(type) { - case string: - inputFragment = v - case map[string]interface{}: - inputMap = v - } - } - - // New tool use starting - if toolUseID != "" && toolName != "" { - if currentToolUse != nil && currentToolUse.toolUseID != toolUseID { - // New tool use arrived while another is in progress (interleaved events) - // This is unusual - log warning and complete the previous one - log.Warnf("kiro: interleaved tool use detected - new ID %s arrived while %s in progress, completing previous", - toolUseID, currentToolUse.toolUseID) - // Emit incomplete previous tool use - if !processedIDs[currentToolUse.toolUseID] { - incomplete := kiroToolUse{ - ToolUseID: currentToolUse.toolUseID, - Name: currentToolUse.name, - } - if currentToolUse.inputBuffer.Len() > 0 { - var input map[string]interface{} - if err := json.Unmarshal([]byte(currentToolUse.inputBuffer.String()), &input); err == nil { - incomplete.Input = input - } - } - toolUses = append(toolUses, incomplete) - processedIDs[currentToolUse.toolUseID] = true - } - currentToolUse = nil - } - - if currentToolUse == nil { - // Check for duplicate - if processedIDs != nil && processedIDs[toolUseID] { - log.Debugf("kiro: skipping duplicate toolUseEvent: %s", toolUseID) - return nil, nil - } - - currentToolUse = &toolUseState{ - toolUseID: toolUseID, - name: toolName, - } - log.Infof("kiro: starting new tool use: %s (ID: %s)", toolName, toolUseID) - } - } - - // Accumulate input fragments - if currentToolUse != nil && inputFragment != "" { - // Accumulate fragments directly - they form valid JSON when combined - // The fragments are already decoded from JSON, so we just concatenate them - currentToolUse.inputBuffer.WriteString(inputFragment) - log.Debugf("kiro: accumulated input fragment, total length: %d", currentToolUse.inputBuffer.Len()) - } - - // If complete input object provided directly - if currentToolUse != nil && inputMap != nil { - inputBytes, _ := json.Marshal(inputMap) - currentToolUse.inputBuffer.Reset() - currentToolUse.inputBuffer.Write(inputBytes) - } - - // Tool use complete - if isStop && currentToolUse != nil { - fullInput := currentToolUse.inputBuffer.String() - - // Repair and parse the accumulated JSON - repairedJSON := repairJSON(fullInput) - var finalInput map[string]interface{} - if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil { - log.Warnf("kiro: failed to parse accumulated tool input: %v, raw: %s", err, fullInput) - // Use empty input as fallback - finalInput = make(map[string]interface{}) - } - - toolUse := kiroToolUse{ - ToolUseID: currentToolUse.toolUseID, - Name: currentToolUse.name, - Input: finalInput, - } - toolUses = append(toolUses, toolUse) - - // Mark as processed - if processedIDs != nil { - processedIDs[currentToolUse.toolUseID] = true - } - - log.Infof("kiro: completed tool use: %s (ID: %s)", currentToolUse.name, currentToolUse.toolUseID) - return toolUses, nil // Reset state - } - - return toolUses, currentToolUse -} - -// deduplicateToolUses removes duplicate tool uses based on toolUseId and content (name+arguments). -// This prevents both ID-based duplicates and content-based duplicates (same tool call with different IDs). -func deduplicateToolUses(toolUses []kiroToolUse) []kiroToolUse { - seenIDs := make(map[string]bool) - seenContent := make(map[string]bool) // Content-based deduplication (name + arguments) - var unique []kiroToolUse - - for _, tu := range toolUses { - // Skip if we've already seen this ID - if seenIDs[tu.ToolUseID] { - log.Debugf("kiro: removing ID-duplicate tool use: %s (name: %s)", tu.ToolUseID, tu.Name) - continue - } - - // Build content key for content-based deduplication - inputJSON, _ := json.Marshal(tu.Input) - contentKey := tu.Name + ":" + string(inputJSON) - - // Skip if we've already seen this content (same name + arguments) - if seenContent[contentKey] { - log.Debugf("kiro: removing content-duplicate tool use: %s (id: %s)", tu.Name, tu.ToolUseID) - continue - } - - seenIDs[tu.ToolUseID] = true - seenContent[contentKey] = true - unique = append(unique, tu) - } - - return unique -} +// NOTE: Message merging functions moved to internal/translator/kiro/common/message_merge.go +// NOTE: Tool calling support functions moved to internal/translator/kiro/claude/kiro_claude_tools.go +// The executor now uses kiroclaude.* and kirocommon.* functions instead diff --git a/internal/translator/init.go b/internal/translator/init.go index d19d9b34..0754db03 100644 --- a/internal/translator/init.go +++ b/internal/translator/init.go @@ -35,5 +35,5 @@ import ( _ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/openai/responses" _ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/claude" - _ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/openai/chat-completions" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/openai" ) diff --git a/internal/translator/kiro/claude/init.go b/internal/translator/kiro/claude/init.go index 9e3a2ba3..1685d195 100644 --- a/internal/translator/kiro/claude/init.go +++ b/internal/translator/kiro/claude/init.go @@ -1,3 +1,4 @@ +// Package claude provides translation between Kiro and Claude formats. package claude import ( @@ -12,8 +13,8 @@ func init() { Kiro, ConvertClaudeRequestToKiro, interfaces.TranslateResponse{ - Stream: ConvertKiroResponseToClaude, - NonStream: ConvertKiroResponseToClaudeNonStream, + Stream: ConvertKiroStreamToClaude, + NonStream: ConvertKiroNonStreamToClaude, }, ) } diff --git a/internal/translator/kiro/claude/kiro_claude.go b/internal/translator/kiro/claude/kiro_claude.go index 554dbf21..752a00d9 100644 --- a/internal/translator/kiro/claude/kiro_claude.go +++ b/internal/translator/kiro/claude/kiro_claude.go @@ -1,27 +1,21 @@ // Package claude provides translation between Kiro and Claude formats. // Since Kiro executor generates Claude-compatible SSE format internally (with event: prefix), -// translations are pass-through. +// translations are pass-through for streaming, but responses need proper formatting. package claude import ( - "bytes" "context" ) -// ConvertClaudeRequestToKiro converts Claude request to Kiro format. -// Since Kiro uses Claude format internally, this is mostly a pass-through. -func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bool) []byte { - return bytes.Clone(inputRawJSON) -} - -// ConvertKiroResponseToClaude converts Kiro streaming response to Claude format. +// ConvertKiroStreamToClaude converts Kiro streaming response to Claude format. // Kiro executor already generates complete SSE format with "event:" prefix, // so this is a simple pass-through. -func ConvertKiroResponseToClaude(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string { +func ConvertKiroStreamToClaude(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string { return []string{string(rawResponse)} } -// ConvertKiroResponseToClaudeNonStream converts Kiro non-streaming response to Claude format. -func ConvertKiroResponseToClaudeNonStream(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) string { +// ConvertKiroNonStreamToClaude converts Kiro non-streaming response to Claude format. +// The response is already in Claude format, so this is a pass-through. +func ConvertKiroNonStreamToClaude(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) string { return string(rawResponse) } diff --git a/internal/translator/kiro/claude/kiro_claude_request.go b/internal/translator/kiro/claude/kiro_claude_request.go new file mode 100644 index 00000000..07472be4 --- /dev/null +++ b/internal/translator/kiro/claude/kiro_claude_request.go @@ -0,0 +1,603 @@ +// Package claude provides request translation functionality for Claude API to Kiro format. +// It handles parsing and transforming Claude API requests into the Kiro/Amazon Q API format, +// extracting model information, system instructions, message contents, and tool declarations. +package claude + +import ( + "encoding/json" + "fmt" + "strings" + "time" + "unicode/utf8" + + "github.com/google/uuid" + kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common" + log "github.com/sirupsen/logrus" + "github.com/tidwall/gjson" +) + + +// Kiro API request structs - field order determines JSON key order + +// KiroPayload is the top-level request structure for Kiro API +type KiroPayload struct { + ConversationState KiroConversationState `json:"conversationState"` + ProfileArn string `json:"profileArn,omitempty"` + InferenceConfig *KiroInferenceConfig `json:"inferenceConfig,omitempty"` +} + +// KiroInferenceConfig contains inference parameters for the Kiro API. +type KiroInferenceConfig struct { + MaxTokens int `json:"maxTokens,omitempty"` + Temperature float64 `json:"temperature,omitempty"` +} + +// KiroConversationState holds the conversation context +type KiroConversationState struct { + ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL" - must be first field + ConversationID string `json:"conversationId"` + CurrentMessage KiroCurrentMessage `json:"currentMessage"` + History []KiroHistoryMessage `json:"history,omitempty"` +} + +// KiroCurrentMessage wraps the current user message +type KiroCurrentMessage struct { + UserInputMessage KiroUserInputMessage `json:"userInputMessage"` +} + +// KiroHistoryMessage represents a message in the conversation history +type KiroHistoryMessage struct { + UserInputMessage *KiroUserInputMessage `json:"userInputMessage,omitempty"` + AssistantResponseMessage *KiroAssistantResponseMessage `json:"assistantResponseMessage,omitempty"` +} + +// KiroImage represents an image in Kiro API format +type KiroImage struct { + Format string `json:"format"` + Source KiroImageSource `json:"source"` +} + +// KiroImageSource contains the image data +type KiroImageSource struct { + Bytes string `json:"bytes"` // base64 encoded image data +} + +// KiroUserInputMessage represents a user message +type KiroUserInputMessage struct { + Content string `json:"content"` + ModelID string `json:"modelId"` + Origin string `json:"origin"` + Images []KiroImage `json:"images,omitempty"` + UserInputMessageContext *KiroUserInputMessageContext `json:"userInputMessageContext,omitempty"` +} + +// KiroUserInputMessageContext contains tool-related context +type KiroUserInputMessageContext struct { + ToolResults []KiroToolResult `json:"toolResults,omitempty"` + Tools []KiroToolWrapper `json:"tools,omitempty"` +} + +// KiroToolResult represents a tool execution result +type KiroToolResult struct { + Content []KiroTextContent `json:"content"` + Status string `json:"status"` + ToolUseID string `json:"toolUseId"` +} + +// KiroTextContent represents text content +type KiroTextContent struct { + Text string `json:"text"` +} + +// KiroToolWrapper wraps a tool specification +type KiroToolWrapper struct { + ToolSpecification KiroToolSpecification `json:"toolSpecification"` +} + +// KiroToolSpecification defines a tool's schema +type KiroToolSpecification struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema KiroInputSchema `json:"inputSchema"` +} + +// KiroInputSchema wraps the JSON schema for tool input +type KiroInputSchema struct { + JSON interface{} `json:"json"` +} + +// KiroAssistantResponseMessage represents an assistant message +type KiroAssistantResponseMessage struct { + Content string `json:"content"` + ToolUses []KiroToolUse `json:"toolUses,omitempty"` +} + +// KiroToolUse represents a tool invocation by the assistant +type KiroToolUse struct { + ToolUseID string `json:"toolUseId"` + Name string `json:"name"` + Input map[string]interface{} `json:"input"` +} + +// ConvertClaudeRequestToKiro converts a Claude API request to Kiro format. +// This is the main entry point for request translation. +func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bool) []byte { + // For Kiro, we pass through the Claude format since buildKiroPayload + // expects Claude format and does the conversion internally. + // The actual conversion happens in the executor when building the HTTP request. + return inputRawJSON +} + +// BuildKiroPayload constructs the Kiro API request payload from Claude format. +// Supports tool calling - tools are passed via userInputMessageContext. +// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE. +// isAgentic parameter enables chunked write optimization prompt for -agentic model variants. +// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode). +// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint. +func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte { + // Extract max_tokens for potential use in inferenceConfig + var maxTokens int64 + if mt := gjson.GetBytes(claudeBody, "max_tokens"); mt.Exists() { + maxTokens = mt.Int() + } + + // Extract temperature if specified + var temperature float64 + var hasTemperature bool + if temp := gjson.GetBytes(claudeBody, "temperature"); temp.Exists() { + temperature = temp.Float() + hasTemperature = true + } + + // Normalize origin value for Kiro API compatibility + origin = normalizeOrigin(origin) + log.Debugf("kiro: normalized origin value: %s", origin) + + messages := gjson.GetBytes(claudeBody, "messages") + + // For chat-only mode, don't include tools + var tools gjson.Result + if !isChatOnly { + tools = gjson.GetBytes(claudeBody, "tools") + } + + // Extract system prompt + systemPrompt := extractSystemPrompt(claudeBody) + + // Check for thinking mode + thinkingEnabled, budgetTokens := checkThinkingMode(claudeBody) + + // Inject timestamp context + timestamp := time.Now().Format("2006-01-02 15:04:05 MST") + timestampContext := fmt.Sprintf("[Context: Current time is %s]", timestamp) + if systemPrompt != "" { + systemPrompt = timestampContext + "\n\n" + systemPrompt + } else { + systemPrompt = timestampContext + } + log.Debugf("kiro: injected timestamp context: %s", timestamp) + + // Inject agentic optimization prompt for -agentic model variants + if isAgentic { + if systemPrompt != "" { + systemPrompt += "\n" + } + systemPrompt += kirocommon.KiroAgenticSystemPrompt + } + + // Inject thinking hint when thinking mode is enabled + if thinkingEnabled { + if systemPrompt != "" { + systemPrompt += "\n" + } + dynamicThinkingHint := fmt.Sprintf("interleaved%d", budgetTokens) + systemPrompt += dynamicThinkingHint + log.Debugf("kiro: injected dynamic thinking hint into system prompt, max_thinking_length: %d", budgetTokens) + } + + // Convert Claude tools to Kiro format + kiroTools := convertClaudeToolsToKiro(tools) + + // Process messages and build history + history, currentUserMsg, currentToolResults := processMessages(messages, modelID, origin) + + // Build content with system prompt + if currentUserMsg != nil { + currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, systemPrompt, currentToolResults) + + // Deduplicate currentToolResults + currentToolResults = deduplicateToolResults(currentToolResults) + + // Build userInputMessageContext with tools and tool results + if len(kiroTools) > 0 || len(currentToolResults) > 0 { + currentUserMsg.UserInputMessageContext = &KiroUserInputMessageContext{ + Tools: kiroTools, + ToolResults: currentToolResults, + } + } + } + + // Build payload + var currentMessage KiroCurrentMessage + if currentUserMsg != nil { + currentMessage = KiroCurrentMessage{UserInputMessage: *currentUserMsg} + } else { + fallbackContent := "" + if systemPrompt != "" { + fallbackContent = "--- SYSTEM PROMPT ---\n" + systemPrompt + "\n--- END SYSTEM PROMPT ---\n" + } + currentMessage = KiroCurrentMessage{UserInputMessage: KiroUserInputMessage{ + Content: fallbackContent, + ModelID: modelID, + Origin: origin, + }} + } + + // Build inferenceConfig if we have any inference parameters + var inferenceConfig *KiroInferenceConfig + if maxTokens > 0 || hasTemperature { + inferenceConfig = &KiroInferenceConfig{} + if maxTokens > 0 { + inferenceConfig.MaxTokens = int(maxTokens) + } + if hasTemperature { + inferenceConfig.Temperature = temperature + } + } + + payload := KiroPayload{ + ConversationState: KiroConversationState{ + ChatTriggerType: "MANUAL", + ConversationID: uuid.New().String(), + CurrentMessage: currentMessage, + History: history, + }, + ProfileArn: profileArn, + InferenceConfig: inferenceConfig, + } + + result, err := json.Marshal(payload) + if err != nil { + log.Debugf("kiro: failed to marshal payload: %v", err) + return nil + } + + return result +} + +// normalizeOrigin normalizes origin value for Kiro API compatibility +func normalizeOrigin(origin string) string { + switch origin { + case "KIRO_CLI": + return "CLI" + case "KIRO_AI_EDITOR": + return "AI_EDITOR" + case "AMAZON_Q": + return "CLI" + case "KIRO_IDE": + return "AI_EDITOR" + default: + return origin + } +} + +// extractSystemPrompt extracts system prompt from Claude request +func extractSystemPrompt(claudeBody []byte) string { + systemField := gjson.GetBytes(claudeBody, "system") + if systemField.IsArray() { + var sb strings.Builder + for _, block := range systemField.Array() { + if block.Get("type").String() == "text" { + sb.WriteString(block.Get("text").String()) + } else if block.Type == gjson.String { + sb.WriteString(block.String()) + } + } + return sb.String() + } + return systemField.String() +} + +// checkThinkingMode checks if thinking mode is enabled in the Claude request +func checkThinkingMode(claudeBody []byte) (bool, int64) { + thinkingEnabled := false + var budgetTokens int64 = 16000 + + thinkingField := gjson.GetBytes(claudeBody, "thinking") + if thinkingField.Exists() { + thinkingType := thinkingField.Get("type").String() + if thinkingType == "enabled" { + thinkingEnabled = true + if bt := thinkingField.Get("budget_tokens"); bt.Exists() { + budgetTokens = bt.Int() + if budgetTokens <= 0 { + thinkingEnabled = false + log.Debugf("kiro: thinking mode disabled via budget_tokens <= 0") + } + } + if thinkingEnabled { + log.Debugf("kiro: thinking mode enabled via Claude API parameter, budget_tokens: %d", budgetTokens) + } + } + } + + return thinkingEnabled, budgetTokens +} + +// convertClaudeToolsToKiro converts Claude tools to Kiro format +func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper { + var kiroTools []KiroToolWrapper + if !tools.IsArray() { + return kiroTools + } + + for _, tool := range tools.Array() { + name := tool.Get("name").String() + description := tool.Get("description").String() + inputSchema := tool.Get("input_schema").Value() + + // CRITICAL FIX: Kiro API requires non-empty description + if strings.TrimSpace(description) == "" { + description = fmt.Sprintf("Tool: %s", name) + log.Debugf("kiro: tool '%s' has empty description, using default: %s", name, description) + } + + // Truncate long descriptions + if len(description) > kirocommon.KiroMaxToolDescLen { + truncLen := kirocommon.KiroMaxToolDescLen - 30 + for truncLen > 0 && !utf8.RuneStart(description[truncLen]) { + truncLen-- + } + description = description[:truncLen] + "... (description truncated)" + } + + kiroTools = append(kiroTools, KiroToolWrapper{ + ToolSpecification: KiroToolSpecification{ + Name: name, + Description: description, + InputSchema: KiroInputSchema{JSON: inputSchema}, + }, + }) + } + + return kiroTools +} + +// processMessages processes Claude messages and builds Kiro history +func processMessages(messages gjson.Result, modelID, origin string) ([]KiroHistoryMessage, *KiroUserInputMessage, []KiroToolResult) { + var history []KiroHistoryMessage + var currentUserMsg *KiroUserInputMessage + var currentToolResults []KiroToolResult + + // Merge adjacent messages with the same role + messagesArray := kirocommon.MergeAdjacentMessages(messages.Array()) + for i, msg := range messagesArray { + role := msg.Get("role").String() + isLastMessage := i == len(messagesArray)-1 + + if role == "user" { + userMsg, toolResults := BuildUserMessageStruct(msg, modelID, origin) + if isLastMessage { + currentUserMsg = &userMsg + currentToolResults = toolResults + } else { + // CRITICAL: Kiro API requires content to be non-empty for history messages too + if strings.TrimSpace(userMsg.Content) == "" { + if len(toolResults) > 0 { + userMsg.Content = "Tool results provided." + } else { + userMsg.Content = "Continue" + } + } + // For history messages, embed tool results in context + if len(toolResults) > 0 { + userMsg.UserInputMessageContext = &KiroUserInputMessageContext{ + ToolResults: toolResults, + } + } + history = append(history, KiroHistoryMessage{ + UserInputMessage: &userMsg, + }) + } + } else if role == "assistant" { + assistantMsg := BuildAssistantMessageStruct(msg) + if isLastMessage { + history = append(history, KiroHistoryMessage{ + AssistantResponseMessage: &assistantMsg, + }) + // Create a "Continue" user message as currentMessage + currentUserMsg = &KiroUserInputMessage{ + Content: "Continue", + ModelID: modelID, + Origin: origin, + } + } else { + history = append(history, KiroHistoryMessage{ + AssistantResponseMessage: &assistantMsg, + }) + } + } + } + + return history, currentUserMsg, currentToolResults +} + +// buildFinalContent builds the final content with system prompt +func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResult) string { + var contentBuilder strings.Builder + + if systemPrompt != "" { + contentBuilder.WriteString("--- SYSTEM PROMPT ---\n") + contentBuilder.WriteString(systemPrompt) + contentBuilder.WriteString("\n--- END SYSTEM PROMPT ---\n\n") + } + + contentBuilder.WriteString(content) + finalContent := contentBuilder.String() + + // CRITICAL: Kiro API requires content to be non-empty + if strings.TrimSpace(finalContent) == "" { + if len(toolResults) > 0 { + finalContent = "Tool results provided." + } else { + finalContent = "Continue" + } + log.Debugf("kiro: content was empty, using default: %s", finalContent) + } + + return finalContent +} + +// deduplicateToolResults removes duplicate tool results +func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult { + if len(toolResults) == 0 { + return toolResults + } + + seenIDs := make(map[string]bool) + unique := make([]KiroToolResult, 0, len(toolResults)) + for _, tr := range toolResults { + if !seenIDs[tr.ToolUseID] { + seenIDs[tr.ToolUseID] = true + unique = append(unique, tr) + } else { + log.Debugf("kiro: skipping duplicate toolResult in currentMessage: %s", tr.ToolUseID) + } + } + return unique +} + +// BuildUserMessageStruct builds a user message and extracts tool results +func BuildUserMessageStruct(msg gjson.Result, modelID, origin string) (KiroUserInputMessage, []KiroToolResult) { + content := msg.Get("content") + var contentBuilder strings.Builder + var toolResults []KiroToolResult + var images []KiroImage + + // Track seen toolUseIds to deduplicate + seenToolUseIDs := make(map[string]bool) + + if content.IsArray() { + for _, part := range content.Array() { + partType := part.Get("type").String() + switch partType { + case "text": + contentBuilder.WriteString(part.Get("text").String()) + case "image": + mediaType := part.Get("source.media_type").String() + data := part.Get("source.data").String() + + format := "" + if idx := strings.LastIndex(mediaType, "/"); idx != -1 { + format = mediaType[idx+1:] + } + + if format != "" && data != "" { + images = append(images, KiroImage{ + Format: format, + Source: KiroImageSource{ + Bytes: data, + }, + }) + } + case "tool_result": + toolUseID := part.Get("tool_use_id").String() + + // Skip duplicate toolUseIds + if seenToolUseIDs[toolUseID] { + log.Debugf("kiro: skipping duplicate tool_result with toolUseId: %s", toolUseID) + continue + } + seenToolUseIDs[toolUseID] = true + + isError := part.Get("is_error").Bool() + resultContent := part.Get("content") + + var textContents []KiroTextContent + if resultContent.IsArray() { + for _, item := range resultContent.Array() { + if item.Get("type").String() == "text" { + textContents = append(textContents, KiroTextContent{Text: item.Get("text").String()}) + } else if item.Type == gjson.String { + textContents = append(textContents, KiroTextContent{Text: item.String()}) + } + } + } else if resultContent.Type == gjson.String { + textContents = append(textContents, KiroTextContent{Text: resultContent.String()}) + } + + if len(textContents) == 0 { + textContents = append(textContents, KiroTextContent{Text: "Tool use was cancelled by the user"}) + } + + status := "success" + if isError { + status = "error" + } + + toolResults = append(toolResults, KiroToolResult{ + ToolUseID: toolUseID, + Content: textContents, + Status: status, + }) + } + } + } else { + contentBuilder.WriteString(content.String()) + } + + userMsg := KiroUserInputMessage{ + Content: contentBuilder.String(), + ModelID: modelID, + Origin: origin, + } + + if len(images) > 0 { + userMsg.Images = images + } + + return userMsg, toolResults +} + +// BuildAssistantMessageStruct builds an assistant message with tool uses +func BuildAssistantMessageStruct(msg gjson.Result) KiroAssistantResponseMessage { + content := msg.Get("content") + var contentBuilder strings.Builder + var toolUses []KiroToolUse + + if content.IsArray() { + for _, part := range content.Array() { + partType := part.Get("type").String() + switch partType { + case "text": + contentBuilder.WriteString(part.Get("text").String()) + case "tool_use": + toolUseID := part.Get("id").String() + toolName := part.Get("name").String() + toolInput := part.Get("input") + + var inputMap map[string]interface{} + if toolInput.IsObject() { + inputMap = make(map[string]interface{}) + toolInput.ForEach(func(key, value gjson.Result) bool { + inputMap[key.String()] = value.Value() + return true + }) + } + + toolUses = append(toolUses, KiroToolUse{ + ToolUseID: toolUseID, + Name: toolName, + Input: inputMap, + }) + } + } + } else { + contentBuilder.WriteString(content.String()) + } + + return KiroAssistantResponseMessage{ + Content: contentBuilder.String(), + ToolUses: toolUses, + } +} \ No newline at end of file diff --git a/internal/translator/kiro/claude/kiro_claude_response.go b/internal/translator/kiro/claude/kiro_claude_response.go new file mode 100644 index 00000000..49ebf79e --- /dev/null +++ b/internal/translator/kiro/claude/kiro_claude_response.go @@ -0,0 +1,184 @@ +// Package claude provides response translation functionality for Kiro API to Claude format. +// This package handles the conversion of Kiro API responses into Claude-compatible format, +// including support for thinking blocks and tool use. +package claude + +import ( + "encoding/json" + "strings" + + "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage" + log "github.com/sirupsen/logrus" + + kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common" +) + +// Local references to kirocommon constants for thinking block parsing +var ( + thinkingStartTag = kirocommon.ThinkingStartTag + thinkingEndTag = kirocommon.ThinkingEndTag +) + +// BuildClaudeResponse constructs a Claude-compatible response. +// Supports tool_use blocks when tools are present in the response. +// Supports thinking blocks - parses tags and converts to Claude thinking content blocks. +// stopReason is passed from upstream; fallback logic applied if empty. +func BuildClaudeResponse(content string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte { + var contentBlocks []map[string]interface{} + + // Extract thinking blocks and text from content + if content != "" { + blocks := ExtractThinkingFromContent(content) + contentBlocks = append(contentBlocks, blocks...) + + // Log if thinking blocks were extracted + for _, block := range blocks { + if block["type"] == "thinking" { + thinkingContent := block["thinking"].(string) + log.Infof("kiro: buildClaudeResponse extracted thinking block (len: %d)", len(thinkingContent)) + } + } + } + + // Add tool_use blocks + for _, toolUse := range toolUses { + contentBlocks = append(contentBlocks, map[string]interface{}{ + "type": "tool_use", + "id": toolUse.ToolUseID, + "name": toolUse.Name, + "input": toolUse.Input, + }) + } + + // Ensure at least one content block (Claude API requires non-empty content) + if len(contentBlocks) == 0 { + contentBlocks = append(contentBlocks, map[string]interface{}{ + "type": "text", + "text": "", + }) + } + + // Use upstream stopReason; apply fallback logic if not provided + if stopReason == "" { + stopReason = "end_turn" + if len(toolUses) > 0 { + stopReason = "tool_use" + } + log.Debugf("kiro: buildClaudeResponse using fallback stop_reason: %s", stopReason) + } + + // Log warning if response was truncated due to max_tokens + if stopReason == "max_tokens" { + log.Warnf("kiro: response truncated due to max_tokens limit (buildClaudeResponse)") + } + + response := map[string]interface{}{ + "id": "msg_" + uuid.New().String()[:24], + "type": "message", + "role": "assistant", + "model": model, + "content": contentBlocks, + "stop_reason": stopReason, + "usage": map[string]interface{}{ + "input_tokens": usageInfo.InputTokens, + "output_tokens": usageInfo.OutputTokens, + }, + } + result, _ := json.Marshal(response) + return result +} + +// ExtractThinkingFromContent parses content to extract thinking blocks and text. +// Returns a list of content blocks in the order they appear in the content. +// Handles interleaved thinking and text blocks correctly. +func ExtractThinkingFromContent(content string) []map[string]interface{} { + var blocks []map[string]interface{} + + if content == "" { + return blocks + } + + // Check if content contains thinking tags at all + if !strings.Contains(content, thinkingStartTag) { + // No thinking tags, return as plain text + return []map[string]interface{}{ + { + "type": "text", + "text": content, + }, + } + } + + log.Debugf("kiro: extractThinkingFromContent - found thinking tags in content (len: %d)", len(content)) + + remaining := content + + for len(remaining) > 0 { + // Look for tag + startIdx := strings.Index(remaining, thinkingStartTag) + + if startIdx == -1 { + // No more thinking tags, add remaining as text + if strings.TrimSpace(remaining) != "" { + blocks = append(blocks, map[string]interface{}{ + "type": "text", + "text": remaining, + }) + } + break + } + + // Add text before thinking tag (if any meaningful content) + if startIdx > 0 { + textBefore := remaining[:startIdx] + if strings.TrimSpace(textBefore) != "" { + blocks = append(blocks, map[string]interface{}{ + "type": "text", + "text": textBefore, + }) + } + } + + // Move past the opening tag + remaining = remaining[startIdx+len(thinkingStartTag):] + + // Find closing tag + endIdx := strings.Index(remaining, thinkingEndTag) + + if endIdx == -1 { + // No closing tag found, treat rest as thinking content (incomplete response) + if strings.TrimSpace(remaining) != "" { + blocks = append(blocks, map[string]interface{}{ + "type": "thinking", + "thinking": remaining, + }) + log.Warnf("kiro: extractThinkingFromContent - missing closing tag") + } + break + } + + // Extract thinking content between tags + thinkContent := remaining[:endIdx] + if strings.TrimSpace(thinkContent) != "" { + blocks = append(blocks, map[string]interface{}{ + "type": "thinking", + "thinking": thinkContent, + }) + log.Debugf("kiro: extractThinkingFromContent - extracted thinking block (len: %d)", len(thinkContent)) + } + + // Move past the closing tag + remaining = remaining[endIdx+len(thinkingEndTag):] + } + + // If no blocks were created (all whitespace), return empty text block + if len(blocks) == 0 { + blocks = append(blocks, map[string]interface{}{ + "type": "text", + "text": "", + }) + } + + return blocks +} \ No newline at end of file diff --git a/internal/translator/kiro/claude/kiro_claude_stream.go b/internal/translator/kiro/claude/kiro_claude_stream.go new file mode 100644 index 00000000..6ea6e4cd --- /dev/null +++ b/internal/translator/kiro/claude/kiro_claude_stream.go @@ -0,0 +1,176 @@ +// Package claude provides streaming SSE event building for Claude format. +// This package handles the construction of Claude-compatible Server-Sent Events (SSE) +// for streaming responses from Kiro API. +package claude + +import ( + "encoding/json" + + "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage" +) + +// BuildClaudeMessageStartEvent creates the message_start SSE event +func BuildClaudeMessageStartEvent(model string, inputTokens int64) []byte { + event := map[string]interface{}{ + "type": "message_start", + "message": map[string]interface{}{ + "id": "msg_" + uuid.New().String()[:24], + "type": "message", + "role": "assistant", + "content": []interface{}{}, + "model": model, + "stop_reason": nil, + "stop_sequence": nil, + "usage": map[string]interface{}{"input_tokens": inputTokens, "output_tokens": 0}, + }, + } + result, _ := json.Marshal(event) + return []byte("event: message_start\ndata: " + string(result)) +} + +// BuildClaudeContentBlockStartEvent creates a content_block_start SSE event +func BuildClaudeContentBlockStartEvent(index int, blockType, toolUseID, toolName string) []byte { + var contentBlock map[string]interface{} + switch blockType { + case "tool_use": + contentBlock = map[string]interface{}{ + "type": "tool_use", + "id": toolUseID, + "name": toolName, + "input": map[string]interface{}{}, + } + case "thinking": + contentBlock = map[string]interface{}{ + "type": "thinking", + "thinking": "", + } + default: + contentBlock = map[string]interface{}{ + "type": "text", + "text": "", + } + } + + event := map[string]interface{}{ + "type": "content_block_start", + "index": index, + "content_block": contentBlock, + } + result, _ := json.Marshal(event) + return []byte("event: content_block_start\ndata: " + string(result)) +} + +// BuildClaudeStreamEvent creates a text_delta content_block_delta SSE event +func BuildClaudeStreamEvent(contentDelta string, index int) []byte { + event := map[string]interface{}{ + "type": "content_block_delta", + "index": index, + "delta": map[string]interface{}{ + "type": "text_delta", + "text": contentDelta, + }, + } + result, _ := json.Marshal(event) + return []byte("event: content_block_delta\ndata: " + string(result)) +} + +// BuildClaudeInputJsonDeltaEvent creates an input_json_delta event for tool use streaming +func BuildClaudeInputJsonDeltaEvent(partialJSON string, index int) []byte { + event := map[string]interface{}{ + "type": "content_block_delta", + "index": index, + "delta": map[string]interface{}{ + "type": "input_json_delta", + "partial_json": partialJSON, + }, + } + result, _ := json.Marshal(event) + return []byte("event: content_block_delta\ndata: " + string(result)) +} + +// BuildClaudeContentBlockStopEvent creates a content_block_stop SSE event +func BuildClaudeContentBlockStopEvent(index int) []byte { + event := map[string]interface{}{ + "type": "content_block_stop", + "index": index, + } + result, _ := json.Marshal(event) + return []byte("event: content_block_stop\ndata: " + string(result)) +} + +// BuildClaudeMessageDeltaEvent creates the message_delta event with stop_reason and usage +func BuildClaudeMessageDeltaEvent(stopReason string, usageInfo usage.Detail) []byte { + deltaEvent := map[string]interface{}{ + "type": "message_delta", + "delta": map[string]interface{}{ + "stop_reason": stopReason, + "stop_sequence": nil, + }, + "usage": map[string]interface{}{ + "input_tokens": usageInfo.InputTokens, + "output_tokens": usageInfo.OutputTokens, + }, + } + deltaResult, _ := json.Marshal(deltaEvent) + return []byte("event: message_delta\ndata: " + string(deltaResult)) +} + +// BuildClaudeMessageStopOnlyEvent creates only the message_stop event +func BuildClaudeMessageStopOnlyEvent() []byte { + stopEvent := map[string]interface{}{ + "type": "message_stop", + } + stopResult, _ := json.Marshal(stopEvent) + return []byte("event: message_stop\ndata: " + string(stopResult)) +} + +// BuildClaudePingEventWithUsage creates a ping event with embedded usage information. +// This is used for real-time usage estimation during streaming. +func BuildClaudePingEventWithUsage(inputTokens, outputTokens int64) []byte { + event := map[string]interface{}{ + "type": "ping", + "usage": map[string]interface{}{ + "input_tokens": inputTokens, + "output_tokens": outputTokens, + "total_tokens": inputTokens + outputTokens, + "estimated": true, + }, + } + result, _ := json.Marshal(event) + return []byte("event: ping\ndata: " + string(result)) +} + +// BuildClaudeThinkingDeltaEvent creates a thinking_delta event for Claude API compatibility. +// This is used when streaming thinking content wrapped in tags. +func BuildClaudeThinkingDeltaEvent(thinkingDelta string, index int) []byte { + event := map[string]interface{}{ + "type": "content_block_delta", + "index": index, + "delta": map[string]interface{}{ + "type": "thinking_delta", + "thinking": thinkingDelta, + }, + } + result, _ := json.Marshal(event) + return []byte("event: content_block_delta\ndata: " + string(result)) +} + +// PendingTagSuffix detects if the buffer ends with a partial prefix of the given tag. +// Returns the length of the partial match (0 if no match). +// Based on amq2api implementation for handling cross-chunk tag boundaries. +func PendingTagSuffix(buffer, tag string) int { + if buffer == "" || tag == "" { + return 0 + } + maxLen := len(buffer) + if maxLen > len(tag)-1 { + maxLen = len(tag) - 1 + } + for length := maxLen; length > 0; length-- { + if len(buffer) >= length && buffer[len(buffer)-length:] == tag[:length] { + return length + } + } + return 0 +} \ No newline at end of file diff --git a/internal/translator/kiro/claude/kiro_claude_tools.go b/internal/translator/kiro/claude/kiro_claude_tools.go new file mode 100644 index 00000000..93ede875 --- /dev/null +++ b/internal/translator/kiro/claude/kiro_claude_tools.go @@ -0,0 +1,522 @@ +// Package claude provides tool calling support for Kiro to Claude translation. +// This package handles parsing embedded tool calls, JSON repair, and deduplication. +package claude + +import ( + "encoding/json" + "regexp" + "strings" + + "github.com/google/uuid" + kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common" + log "github.com/sirupsen/logrus" +) + +// ToolUseState tracks the state of an in-progress tool use during streaming. +type ToolUseState struct { + ToolUseID string + Name string + InputBuffer strings.Builder + IsComplete bool +} + +// Pre-compiled regex patterns for performance +var ( + // embeddedToolCallPattern matches [Called tool_name with args: {...}] format + embeddedToolCallPattern = regexp.MustCompile(`\[Called\s+([A-Za-z0-9_.-]+)\s+with\s+args:\s*`) + // trailingCommaPattern matches trailing commas before closing braces/brackets + trailingCommaPattern = regexp.MustCompile(`,\s*([}\]])`) +) + +// ParseEmbeddedToolCalls extracts [Called tool_name with args: {...}] format from text. +// Kiro sometimes embeds tool calls in text content instead of using toolUseEvent. +// Returns the cleaned text (with tool calls removed) and extracted tool uses. +func ParseEmbeddedToolCalls(text string, processedIDs map[string]bool) (string, []KiroToolUse) { + if !strings.Contains(text, "[Called") { + return text, nil + } + + var toolUses []KiroToolUse + cleanText := text + + // Find all [Called markers + matches := embeddedToolCallPattern.FindAllStringSubmatchIndex(text, -1) + if len(matches) == 0 { + return text, nil + } + + // Process matches in reverse order to maintain correct indices + for i := len(matches) - 1; i >= 0; i-- { + matchStart := matches[i][0] + toolNameStart := matches[i][2] + toolNameEnd := matches[i][3] + + if toolNameStart < 0 || toolNameEnd < 0 { + continue + } + + toolName := text[toolNameStart:toolNameEnd] + + // Find the JSON object start (after "with args:") + jsonStart := matches[i][1] + if jsonStart >= len(text) { + continue + } + + // Skip whitespace to find the opening brace + for jsonStart < len(text) && (text[jsonStart] == ' ' || text[jsonStart] == '\t') { + jsonStart++ + } + + if jsonStart >= len(text) || text[jsonStart] != '{' { + continue + } + + // Find matching closing bracket + jsonEnd := findMatchingBracket(text, jsonStart) + if jsonEnd < 0 { + continue + } + + // Extract JSON and find the closing bracket of [Called ...] + jsonStr := text[jsonStart : jsonEnd+1] + + // Find the closing ] after the JSON + closingBracket := jsonEnd + 1 + for closingBracket < len(text) && text[closingBracket] != ']' { + closingBracket++ + } + if closingBracket >= len(text) { + continue + } + + // End index of the full tool call (closing ']' inclusive) + matchEnd := closingBracket + 1 + + // Repair and parse JSON + repairedJSON := RepairJSON(jsonStr) + var inputMap map[string]interface{} + if err := json.Unmarshal([]byte(repairedJSON), &inputMap); err != nil { + log.Debugf("kiro: failed to parse embedded tool call JSON: %v, raw: %s", err, jsonStr) + continue + } + + // Generate unique tool ID + toolUseID := "toolu_" + uuid.New().String()[:12] + + // Check for duplicates using name+input as key + dedupeKey := toolName + ":" + repairedJSON + if processedIDs != nil { + if processedIDs[dedupeKey] { + log.Debugf("kiro: skipping duplicate embedded tool call: %s", toolName) + // Still remove from text even if duplicate + if matchStart >= 0 && matchEnd <= len(cleanText) && matchStart <= matchEnd { + cleanText = cleanText[:matchStart] + cleanText[matchEnd:] + } + continue + } + processedIDs[dedupeKey] = true + } + + toolUses = append(toolUses, KiroToolUse{ + ToolUseID: toolUseID, + Name: toolName, + Input: inputMap, + }) + + log.Infof("kiro: extracted embedded tool call: %s (ID: %s)", toolName, toolUseID) + + // Remove from clean text (index-based removal to avoid deleting the wrong occurrence) + if matchStart >= 0 && matchEnd <= len(cleanText) && matchStart <= matchEnd { + cleanText = cleanText[:matchStart] + cleanText[matchEnd:] + } + } + + return cleanText, toolUses +} + +// findMatchingBracket finds the index of the closing brace/bracket that matches +// the opening one at startPos. Handles nested objects and strings correctly. +func findMatchingBracket(text string, startPos int) int { + if startPos >= len(text) { + return -1 + } + + openChar := text[startPos] + var closeChar byte + switch openChar { + case '{': + closeChar = '}' + case '[': + closeChar = ']' + default: + return -1 + } + + depth := 1 + inString := false + escapeNext := false + + for i := startPos + 1; i < len(text); i++ { + char := text[i] + + if escapeNext { + escapeNext = false + continue + } + + if char == '\\' && inString { + escapeNext = true + continue + } + + if char == '"' { + inString = !inString + continue + } + + if !inString { + if char == openChar { + depth++ + } else if char == closeChar { + depth-- + if depth == 0 { + return i + } + } + } + } + + return -1 +} + +// RepairJSON attempts to fix common JSON issues that may occur in tool call arguments. +// Conservative repair strategy: +// 1. First try to parse JSON directly - if valid, return as-is +// 2. Only attempt repair if parsing fails +// 3. After repair, validate the result - if still invalid, return original +func RepairJSON(jsonString string) string { + // Handle empty or invalid input + if jsonString == "" { + return "{}" + } + + str := strings.TrimSpace(jsonString) + if str == "" { + return "{}" + } + + // CONSERVATIVE STRATEGY: First try to parse directly + var testParse interface{} + if err := json.Unmarshal([]byte(str), &testParse); err == nil { + log.Debugf("kiro: repairJSON - JSON is already valid, returning unchanged") + return str + } + + log.Debugf("kiro: repairJSON - JSON parse failed, attempting repair") + originalStr := str + + // First, escape unescaped newlines/tabs within JSON string values + str = escapeNewlinesInStrings(str) + // Remove trailing commas before closing braces/brackets + str = trailingCommaPattern.ReplaceAllString(str, "$1") + + // Calculate bracket balance + braceCount := 0 + bracketCount := 0 + inString := false + escape := false + lastValidIndex := -1 + + for i := 0; i < len(str); i++ { + char := str[i] + + if escape { + escape = false + continue + } + + if char == '\\' { + escape = true + continue + } + + if char == '"' { + inString = !inString + continue + } + + if inString { + continue + } + + switch char { + case '{': + braceCount++ + case '}': + braceCount-- + case '[': + bracketCount++ + case ']': + bracketCount-- + } + + if braceCount >= 0 && bracketCount >= 0 { + lastValidIndex = i + } + } + + // If brackets are unbalanced, try to repair + if braceCount > 0 || bracketCount > 0 { + if lastValidIndex > 0 && lastValidIndex < len(str)-1 { + truncated := str[:lastValidIndex+1] + // Recount brackets after truncation + braceCount = 0 + bracketCount = 0 + inString = false + escape = false + for i := 0; i < len(truncated); i++ { + char := truncated[i] + if escape { + escape = false + continue + } + if char == '\\' { + escape = true + continue + } + if char == '"' { + inString = !inString + continue + } + if inString { + continue + } + switch char { + case '{': + braceCount++ + case '}': + braceCount-- + case '[': + bracketCount++ + case ']': + bracketCount-- + } + } + str = truncated + } + + // Add missing closing brackets + for braceCount > 0 { + str += "}" + braceCount-- + } + for bracketCount > 0 { + str += "]" + bracketCount-- + } + } + + // Validate repaired JSON + if err := json.Unmarshal([]byte(str), &testParse); err != nil { + log.Warnf("kiro: repairJSON - repair failed to produce valid JSON, returning original") + return originalStr + } + + log.Debugf("kiro: repairJSON - successfully repaired JSON") + return str +} + +// escapeNewlinesInStrings escapes literal newlines, tabs, and other control characters +// that appear inside JSON string values. +func escapeNewlinesInStrings(raw string) string { + var result strings.Builder + result.Grow(len(raw) + 100) + + inString := false + escaped := false + + for i := 0; i < len(raw); i++ { + c := raw[i] + + if escaped { + result.WriteByte(c) + escaped = false + continue + } + + if c == '\\' && inString { + result.WriteByte(c) + escaped = true + continue + } + + if c == '"' { + inString = !inString + result.WriteByte(c) + continue + } + + if inString { + switch c { + case '\n': + result.WriteString("\\n") + case '\r': + result.WriteString("\\r") + case '\t': + result.WriteString("\\t") + default: + result.WriteByte(c) + } + } else { + result.WriteByte(c) + } + } + + return result.String() +} + +// ProcessToolUseEvent handles a toolUseEvent from the Kiro stream. +// It accumulates input fragments and emits tool_use blocks when complete. +// Returns events to emit and updated state. +func ProcessToolUseEvent(event map[string]interface{}, currentToolUse *ToolUseState, processedIDs map[string]bool) ([]KiroToolUse, *ToolUseState) { + var toolUses []KiroToolUse + + // Extract from nested toolUseEvent or direct format + tu := event + if nested, ok := event["toolUseEvent"].(map[string]interface{}); ok { + tu = nested + } + + toolUseID := kirocommon.GetString(tu, "toolUseId") + toolName := kirocommon.GetString(tu, "name") + isStop := false + if stop, ok := tu["stop"].(bool); ok { + isStop = stop + } + + // Get input - can be string (fragment) or object (complete) + var inputFragment string + var inputMap map[string]interface{} + + if inputRaw, ok := tu["input"]; ok { + switch v := inputRaw.(type) { + case string: + inputFragment = v + case map[string]interface{}: + inputMap = v + } + } + + // New tool use starting + if toolUseID != "" && toolName != "" { + if currentToolUse != nil && currentToolUse.ToolUseID != toolUseID { + log.Warnf("kiro: interleaved tool use detected - new ID %s arrived while %s in progress, completing previous", + toolUseID, currentToolUse.ToolUseID) + if !processedIDs[currentToolUse.ToolUseID] { + incomplete := KiroToolUse{ + ToolUseID: currentToolUse.ToolUseID, + Name: currentToolUse.Name, + } + if currentToolUse.InputBuffer.Len() > 0 { + raw := currentToolUse.InputBuffer.String() + repaired := RepairJSON(raw) + + var input map[string]interface{} + if err := json.Unmarshal([]byte(repaired), &input); err != nil { + log.Warnf("kiro: failed to parse interleaved tool input: %v, raw: %s", err, raw) + input = make(map[string]interface{}) + } + incomplete.Input = input + } + toolUses = append(toolUses, incomplete) + processedIDs[currentToolUse.ToolUseID] = true + } + currentToolUse = nil + } + + if currentToolUse == nil { + if processedIDs != nil && processedIDs[toolUseID] { + log.Debugf("kiro: skipping duplicate toolUseEvent: %s", toolUseID) + return nil, nil + } + + currentToolUse = &ToolUseState{ + ToolUseID: toolUseID, + Name: toolName, + } + log.Infof("kiro: starting new tool use: %s (ID: %s)", toolName, toolUseID) + } + } + + // Accumulate input fragments + if currentToolUse != nil && inputFragment != "" { + currentToolUse.InputBuffer.WriteString(inputFragment) + log.Debugf("kiro: accumulated input fragment, total length: %d", currentToolUse.InputBuffer.Len()) + } + + // If complete input object provided directly + if currentToolUse != nil && inputMap != nil { + inputBytes, _ := json.Marshal(inputMap) + currentToolUse.InputBuffer.Reset() + currentToolUse.InputBuffer.Write(inputBytes) + } + + // Tool use complete + if isStop && currentToolUse != nil { + fullInput := currentToolUse.InputBuffer.String() + + // Repair and parse the accumulated JSON + repairedJSON := RepairJSON(fullInput) + var finalInput map[string]interface{} + if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil { + log.Warnf("kiro: failed to parse accumulated tool input: %v, raw: %s", err, fullInput) + finalInput = make(map[string]interface{}) + } + + toolUse := KiroToolUse{ + ToolUseID: currentToolUse.ToolUseID, + Name: currentToolUse.Name, + Input: finalInput, + } + toolUses = append(toolUses, toolUse) + + if processedIDs != nil { + processedIDs[currentToolUse.ToolUseID] = true + } + + log.Infof("kiro: completed tool use: %s (ID: %s)", currentToolUse.Name, currentToolUse.ToolUseID) + return toolUses, nil + } + + return toolUses, currentToolUse +} + +// DeduplicateToolUses removes duplicate tool uses based on toolUseId and content. +func DeduplicateToolUses(toolUses []KiroToolUse) []KiroToolUse { + seenIDs := make(map[string]bool) + seenContent := make(map[string]bool) + var unique []KiroToolUse + + for _, tu := range toolUses { + if seenIDs[tu.ToolUseID] { + log.Debugf("kiro: removing ID-duplicate tool use: %s (name: %s)", tu.ToolUseID, tu.Name) + continue + } + + inputJSON, _ := json.Marshal(tu.Input) + contentKey := tu.Name + ":" + string(inputJSON) + + if seenContent[contentKey] { + log.Debugf("kiro: removing content-duplicate tool use: %s (id: %s)", tu.Name, tu.ToolUseID) + continue + } + + seenIDs[tu.ToolUseID] = true + seenContent[contentKey] = true + unique = append(unique, tu) + } + + return unique +} + diff --git a/internal/translator/kiro/common/constants.go b/internal/translator/kiro/common/constants.go new file mode 100644 index 00000000..1d4b0330 --- /dev/null +++ b/internal/translator/kiro/common/constants.go @@ -0,0 +1,66 @@ +// Package common provides shared constants and utilities for Kiro translator. +package common + +const ( + // KiroMaxToolDescLen is the maximum description length for Kiro API tools. + // Kiro API limit is 10240 bytes, leave room for "..." + KiroMaxToolDescLen = 10237 + + // ThinkingStartTag is the start tag for thinking blocks in responses. + ThinkingStartTag = "" + + // ThinkingEndTag is the end tag for thinking blocks in responses. + ThinkingEndTag = "" + + // KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes. + // AWS Kiro API has a 2-3 minute timeout for large file write operations. + KiroAgenticSystemPrompt = ` +# CRITICAL: CHUNKED WRITE PROTOCOL (MANDATORY) + +You MUST follow these rules for ALL file operations. Violation causes server timeouts and task failure. + +## ABSOLUTE LIMITS +- **MAXIMUM 350 LINES** per single write/edit operation - NO EXCEPTIONS +- **RECOMMENDED 300 LINES** or less for optimal performance +- **NEVER** write entire files in one operation if >300 lines + +## MANDATORY CHUNKED WRITE STRATEGY + +### For NEW FILES (>300 lines total): +1. FIRST: Write initial chunk (first 250-300 lines) using write_to_file/fsWrite +2. THEN: Append remaining content in 250-300 line chunks using file append operations +3. REPEAT: Continue appending until complete + +### For EDITING EXISTING FILES: +1. Use surgical edits (apply_diff/targeted edits) - change ONLY what's needed +2. NEVER rewrite entire files - use incremental modifications +3. Split large refactors into multiple small, focused edits + +### For LARGE CODE GENERATION: +1. Generate in logical sections (imports, types, functions separately) +2. Write each section as a separate operation +3. Use append operations for subsequent sections + +## EXAMPLES OF CORRECT BEHAVIOR + +✅ CORRECT: Writing a 600-line file +- Operation 1: Write lines 1-300 (initial file creation) +- Operation 2: Append lines 301-600 + +✅ CORRECT: Editing multiple functions +- Operation 1: Edit function A +- Operation 2: Edit function B +- Operation 3: Edit function C + +❌ WRONG: Writing 500 lines in single operation → TIMEOUT +❌ WRONG: Rewriting entire file to change 5 lines → TIMEOUT +❌ WRONG: Generating massive code blocks without chunking → TIMEOUT + +## WHY THIS MATTERS +- Server has 2-3 minute timeout for operations +- Large writes exceed timeout and FAIL completely +- Chunked writes are FASTER and more RELIABLE +- Failed writes waste time and require retry + +REMEMBER: When in doubt, write LESS per operation. Multiple small operations > one large operation.` +) \ No newline at end of file diff --git a/internal/translator/kiro/common/message_merge.go b/internal/translator/kiro/common/message_merge.go new file mode 100644 index 00000000..93f17f28 --- /dev/null +++ b/internal/translator/kiro/common/message_merge.go @@ -0,0 +1,125 @@ +// Package common provides shared utilities for Kiro translators. +package common + +import ( + "encoding/json" + + "github.com/tidwall/gjson" +) + +// MergeAdjacentMessages merges adjacent messages with the same role. +// This reduces API call complexity and improves compatibility. +// Based on AIClient-2-API implementation. +func MergeAdjacentMessages(messages []gjson.Result) []gjson.Result { + if len(messages) <= 1 { + return messages + } + + var merged []gjson.Result + for _, msg := range messages { + if len(merged) == 0 { + merged = append(merged, msg) + continue + } + + lastMsg := merged[len(merged)-1] + currentRole := msg.Get("role").String() + lastRole := lastMsg.Get("role").String() + + if currentRole == lastRole { + // Merge content from current message into last message + mergedContent := mergeMessageContent(lastMsg, msg) + // Create a new merged message JSON + mergedMsg := createMergedMessage(lastRole, mergedContent) + merged[len(merged)-1] = gjson.Parse(mergedMsg) + } else { + merged = append(merged, msg) + } + } + + return merged +} + +// mergeMessageContent merges the content of two messages with the same role. +// Handles both string content and array content (with text, tool_use, tool_result blocks). +func mergeMessageContent(msg1, msg2 gjson.Result) string { + content1 := msg1.Get("content") + content2 := msg2.Get("content") + + // Extract content blocks from both messages + var blocks1, blocks2 []map[string]interface{} + + if content1.IsArray() { + for _, block := range content1.Array() { + blocks1 = append(blocks1, blockToMap(block)) + } + } else if content1.Type == gjson.String { + blocks1 = append(blocks1, map[string]interface{}{ + "type": "text", + "text": content1.String(), + }) + } + + if content2.IsArray() { + for _, block := range content2.Array() { + blocks2 = append(blocks2, blockToMap(block)) + } + } else if content2.Type == gjson.String { + blocks2 = append(blocks2, map[string]interface{}{ + "type": "text", + "text": content2.String(), + }) + } + + // Merge text blocks if both end/start with text + if len(blocks1) > 0 && len(blocks2) > 0 { + if blocks1[len(blocks1)-1]["type"] == "text" && blocks2[0]["type"] == "text" { + // Merge the last text block of msg1 with the first text block of msg2 + text1 := blocks1[len(blocks1)-1]["text"].(string) + text2 := blocks2[0]["text"].(string) + blocks1[len(blocks1)-1]["text"] = text1 + "\n" + text2 + blocks2 = blocks2[1:] // Remove the merged block from blocks2 + } + } + + // Combine all blocks + allBlocks := append(blocks1, blocks2...) + + // Convert to JSON + result, _ := json.Marshal(allBlocks) + return string(result) +} + +// blockToMap converts a gjson.Result block to a map[string]interface{} +func blockToMap(block gjson.Result) map[string]interface{} { + result := make(map[string]interface{}) + block.ForEach(func(key, value gjson.Result) bool { + if value.IsObject() { + result[key.String()] = blockToMap(value) + } else if value.IsArray() { + var arr []interface{} + for _, item := range value.Array() { + if item.IsObject() { + arr = append(arr, blockToMap(item)) + } else { + arr = append(arr, item.Value()) + } + } + result[key.String()] = arr + } else { + result[key.String()] = value.Value() + } + return true + }) + return result +} + +// createMergedMessage creates a JSON string for a merged message +func createMergedMessage(role string, content string) string { + msg := map[string]interface{}{ + "role": role, + "content": json.RawMessage(content), + } + result, _ := json.Marshal(msg) + return string(result) +} \ No newline at end of file diff --git a/internal/translator/kiro/common/utils.go b/internal/translator/kiro/common/utils.go new file mode 100644 index 00000000..f5f5788a --- /dev/null +++ b/internal/translator/kiro/common/utils.go @@ -0,0 +1,16 @@ +// Package common provides shared constants and utilities for Kiro translator. +package common + +// GetString safely extracts a string from a map. +// Returns empty string if the key doesn't exist or the value is not a string. +func GetString(m map[string]interface{}, key string) string { + if v, ok := m[key].(string); ok { + return v + } + return "" +} + +// GetStringValue is an alias for GetString for backward compatibility. +func GetStringValue(m map[string]interface{}, key string) string { + return GetString(m, key) +} \ No newline at end of file diff --git a/internal/translator/kiro/openai/chat-completions/kiro_openai_request.go b/internal/translator/kiro/openai/chat-completions/kiro_openai_request.go deleted file mode 100644 index d1094c1c..00000000 --- a/internal/translator/kiro/openai/chat-completions/kiro_openai_request.go +++ /dev/null @@ -1,348 +0,0 @@ -// Package chat_completions provides request translation from OpenAI to Kiro format. -package chat_completions - -import ( - "bytes" - "encoding/json" - "strings" - - "github.com/tidwall/gjson" - "github.com/tidwall/sjson" -) - -// reasoningEffortToBudget maps OpenAI reasoning_effort values to Claude thinking budget_tokens. -// OpenAI uses "low", "medium", "high" while Claude uses numeric budget_tokens. -var reasoningEffortToBudget = map[string]int{ - "low": 4000, - "medium": 16000, - "high": 32000, -} - -// ConvertOpenAIRequestToKiro transforms an OpenAI Chat Completions API request into Kiro (Claude) format. -// Kiro uses Claude-compatible format internally, so we primarily pass through to Claude format. -// Supports tool calling: OpenAI tools -> Claude tools, tool_calls -> tool_use, tool messages -> tool_result. -// Supports reasoning/thinking: OpenAI reasoning_effort -> Claude thinking parameter. -func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bool) []byte { - rawJSON := bytes.Clone(inputRawJSON) - root := gjson.ParseBytes(rawJSON) - - // Build Claude-compatible request - out := `{"model":"","max_tokens":32000,"messages":[]}` - - // Set model - out, _ = sjson.Set(out, "model", modelName) - - // Copy max_tokens if present - if v := root.Get("max_tokens"); v.Exists() { - out, _ = sjson.Set(out, "max_tokens", v.Int()) - } - - // Copy temperature if present - if v := root.Get("temperature"); v.Exists() { - out, _ = sjson.Set(out, "temperature", v.Float()) - } - - // Copy top_p if present - if v := root.Get("top_p"); v.Exists() { - out, _ = sjson.Set(out, "top_p", v.Float()) - } - - // Handle OpenAI reasoning_effort parameter -> Claude thinking parameter - // OpenAI format: {"reasoning_effort": "low"|"medium"|"high"} - // Claude format: {"thinking": {"type": "enabled", "budget_tokens": N}} - if v := root.Get("reasoning_effort"); v.Exists() { - effort := v.String() - if budget, ok := reasoningEffortToBudget[effort]; ok { - thinking := map[string]interface{}{ - "type": "enabled", - "budget_tokens": budget, - } - out, _ = sjson.Set(out, "thinking", thinking) - } - } - - // Also support direct thinking parameter passthrough (for Claude API compatibility) - // Claude format: {"thinking": {"type": "enabled", "budget_tokens": N}} - if v := root.Get("thinking"); v.Exists() && v.IsObject() { - out, _ = sjson.Set(out, "thinking", v.Value()) - } - - // Convert OpenAI tools to Claude tools format - if tools := root.Get("tools"); tools.Exists() && tools.IsArray() { - claudeTools := make([]interface{}, 0) - for _, tool := range tools.Array() { - if tool.Get("type").String() == "function" { - fn := tool.Get("function") - claudeTool := map[string]interface{}{ - "name": fn.Get("name").String(), - "description": fn.Get("description").String(), - } - // Convert parameters to input_schema - if params := fn.Get("parameters"); params.Exists() { - claudeTool["input_schema"] = params.Value() - } else { - claudeTool["input_schema"] = map[string]interface{}{ - "type": "object", - "properties": map[string]interface{}{}, - } - } - claudeTools = append(claudeTools, claudeTool) - } - } - if len(claudeTools) > 0 { - out, _ = sjson.Set(out, "tools", claudeTools) - } - } - - // Process messages - messages := root.Get("messages") - if messages.Exists() && messages.IsArray() { - claudeMessages := make([]interface{}, 0) - var systemPrompt string - - // Track pending tool results to merge with next user message - var pendingToolResults []map[string]interface{} - - for _, msg := range messages.Array() { - role := msg.Get("role").String() - content := msg.Get("content") - - if role == "system" { - // Extract system message - if content.IsArray() { - for _, part := range content.Array() { - if part.Get("type").String() == "text" { - systemPrompt += part.Get("text").String() + "\n" - } - } - } else { - systemPrompt = content.String() - } - continue - } - - if role == "tool" { - // OpenAI tool message -> Claude tool_result content block - toolCallID := msg.Get("tool_call_id").String() - toolContent := content.String() - - toolResult := map[string]interface{}{ - "type": "tool_result", - "tool_use_id": toolCallID, - } - - // Handle content - can be string or structured - if content.IsArray() { - contentParts := make([]interface{}, 0) - for _, part := range content.Array() { - if part.Get("type").String() == "text" { - contentParts = append(contentParts, map[string]interface{}{ - "type": "text", - "text": part.Get("text").String(), - }) - } - } - toolResult["content"] = contentParts - } else { - toolResult["content"] = toolContent - } - - pendingToolResults = append(pendingToolResults, toolResult) - continue - } - - claudeMsg := map[string]interface{}{ - "role": role, - } - - // Handle assistant messages with tool_calls - if role == "assistant" && msg.Get("tool_calls").Exists() { - contentParts := make([]interface{}, 0) - - // Add text content if present - if content.Exists() && content.String() != "" { - contentParts = append(contentParts, map[string]interface{}{ - "type": "text", - "text": content.String(), - }) - } - - // Convert tool_calls to tool_use blocks - for _, toolCall := range msg.Get("tool_calls").Array() { - toolUseID := toolCall.Get("id").String() - fnName := toolCall.Get("function.name").String() - fnArgs := toolCall.Get("function.arguments").String() - - // Parse arguments JSON - var argsMap map[string]interface{} - if err := json.Unmarshal([]byte(fnArgs), &argsMap); err != nil { - argsMap = map[string]interface{}{"raw": fnArgs} - } - - contentParts = append(contentParts, map[string]interface{}{ - "type": "tool_use", - "id": toolUseID, - "name": fnName, - "input": argsMap, - }) - } - - claudeMsg["content"] = contentParts - claudeMessages = append(claudeMessages, claudeMsg) - continue - } - - // Handle user messages - may need to include pending tool results - if role == "user" && len(pendingToolResults) > 0 { - contentParts := make([]interface{}, 0) - - // Add pending tool results first - for _, tr := range pendingToolResults { - contentParts = append(contentParts, tr) - } - pendingToolResults = nil - - // Add user content - if content.IsArray() { - for _, part := range content.Array() { - partType := part.Get("type").String() - if partType == "text" { - contentParts = append(contentParts, map[string]interface{}{ - "type": "text", - "text": part.Get("text").String(), - }) - } else if partType == "image_url" { - imageURL := part.Get("image_url.url").String() - - // Check if it's base64 format (data:image/png;base64,xxxxx) - if strings.HasPrefix(imageURL, "data:") { - // Parse data URL format - // Format: data:image/png;base64,xxxxx - commaIdx := strings.Index(imageURL, ",") - if commaIdx != -1 { - // Extract media_type (e.g., "image/png") - header := imageURL[5:commaIdx] // Remove "data:" prefix - mediaType := header - if semiIdx := strings.Index(header, ";"); semiIdx != -1 { - mediaType = header[:semiIdx] - } - - // Extract base64 data - base64Data := imageURL[commaIdx+1:] - - contentParts = append(contentParts, map[string]interface{}{ - "type": "image", - "source": map[string]interface{}{ - "type": "base64", - "media_type": mediaType, - "data": base64Data, - }, - }) - } - } else { - // Regular URL format - keep original logic - contentParts = append(contentParts, map[string]interface{}{ - "type": "image", - "source": map[string]interface{}{ - "type": "url", - "url": imageURL, - }, - }) - } - } - } - } else if content.String() != "" { - contentParts = append(contentParts, map[string]interface{}{ - "type": "text", - "text": content.String(), - }) - } - - claudeMsg["content"] = contentParts - claudeMessages = append(claudeMessages, claudeMsg) - continue - } - - // Handle regular content - if content.IsArray() { - contentParts := make([]interface{}, 0) - for _, part := range content.Array() { - partType := part.Get("type").String() - if partType == "text" { - contentParts = append(contentParts, map[string]interface{}{ - "type": "text", - "text": part.Get("text").String(), - }) - } else if partType == "image_url" { - imageURL := part.Get("image_url.url").String() - - // Check if it's base64 format (data:image/png;base64,xxxxx) - if strings.HasPrefix(imageURL, "data:") { - // Parse data URL format - // Format: data:image/png;base64,xxxxx - commaIdx := strings.Index(imageURL, ",") - if commaIdx != -1 { - // Extract media_type (e.g., "image/png") - header := imageURL[5:commaIdx] // Remove "data:" prefix - mediaType := header - if semiIdx := strings.Index(header, ";"); semiIdx != -1 { - mediaType = header[:semiIdx] - } - - // Extract base64 data - base64Data := imageURL[commaIdx+1:] - - contentParts = append(contentParts, map[string]interface{}{ - "type": "image", - "source": map[string]interface{}{ - "type": "base64", - "media_type": mediaType, - "data": base64Data, - }, - }) - } - } else { - // Regular URL format - keep original logic - contentParts = append(contentParts, map[string]interface{}{ - "type": "image", - "source": map[string]interface{}{ - "type": "url", - "url": imageURL, - }, - }) - } - } - } - claudeMsg["content"] = contentParts - } else { - claudeMsg["content"] = content.String() - } - - claudeMessages = append(claudeMessages, claudeMsg) - } - - // If there are pending tool results without a following user message, - // create a user message with just the tool results - if len(pendingToolResults) > 0 { - contentParts := make([]interface{}, 0) - for _, tr := range pendingToolResults { - contentParts = append(contentParts, tr) - } - claudeMessages = append(claudeMessages, map[string]interface{}{ - "role": "user", - "content": contentParts, - }) - } - - out, _ = sjson.Set(out, "messages", claudeMessages) - - if systemPrompt != "" { - out, _ = sjson.Set(out, "system", systemPrompt) - } - } - - // Set stream - out, _ = sjson.Set(out, "stream", stream) - - return []byte(out) -} diff --git a/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go b/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go deleted file mode 100644 index 2fab2a4d..00000000 --- a/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go +++ /dev/null @@ -1,404 +0,0 @@ -// Package chat_completions provides response translation from Kiro to OpenAI format. -package chat_completions - -import ( - "context" - "encoding/json" - "strings" - "time" - - "github.com/google/uuid" - "github.com/tidwall/gjson" -) - -// ConvertKiroResponseToOpenAI converts Kiro streaming response to OpenAI SSE format. -// Handles Claude SSE events: content_block_start, content_block_delta, input_json_delta, -// content_block_stop, message_delta, and message_stop. -// Input may be in SSE format: "event: xxx\ndata: {...}" or raw JSON. -func ConvertKiroResponseToOpenAI(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string { - raw := string(rawResponse) - var results []string - - // Handle SSE format: extract JSON from "data: " lines - // Input format: "event: message_start\ndata: {...}" - lines := strings.Split(raw, "\n") - for _, line := range lines { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "data: ") { - jsonPart := strings.TrimPrefix(line, "data: ") - chunks := convertClaudeEventToOpenAI(jsonPart, model) - results = append(results, chunks...) - } else if strings.HasPrefix(line, "{") { - // Raw JSON (backward compatibility) - chunks := convertClaudeEventToOpenAI(line, model) - results = append(results, chunks...) - } - } - - return results -} - -// convertClaudeEventToOpenAI converts a single Claude JSON event to OpenAI format -func convertClaudeEventToOpenAI(jsonStr string, model string) []string { - root := gjson.Parse(jsonStr) - var results []string - - eventType := root.Get("type").String() - - switch eventType { - case "message_start": - // Initial message event - emit initial chunk with role - response := map[string]interface{}{ - "id": "chatcmpl-" + uuid.New().String()[:24], - "object": "chat.completion.chunk", - "created": time.Now().Unix(), - "model": model, - "choices": []map[string]interface{}{ - { - "index": 0, - "delta": map[string]interface{}{ - "role": "assistant", - "content": "", - }, - "finish_reason": nil, - }, - }, - } - result, _ := json.Marshal(response) - results = append(results, string(result)) - return results - - case "content_block_start": - // Start of a content block (text or tool_use) - blockType := root.Get("content_block.type").String() - index := int(root.Get("index").Int()) - - if blockType == "tool_use" { - // Start of tool_use block - toolUseID := root.Get("content_block.id").String() - toolName := root.Get("content_block.name").String() - - toolCall := map[string]interface{}{ - "index": index, - "id": toolUseID, - "type": "function", - "function": map[string]interface{}{ - "name": toolName, - "arguments": "", - }, - } - - response := map[string]interface{}{ - "id": "chatcmpl-" + uuid.New().String()[:24], - "object": "chat.completion.chunk", - "created": time.Now().Unix(), - "model": model, - "choices": []map[string]interface{}{ - { - "index": 0, - "delta": map[string]interface{}{ - "tool_calls": []map[string]interface{}{toolCall}, - }, - "finish_reason": nil, - }, - }, - } - result, _ := json.Marshal(response) - results = append(results, string(result)) - } - return results - - case "content_block_delta": - index := int(root.Get("index").Int()) - deltaType := root.Get("delta.type").String() - - if deltaType == "text_delta" { - // Text content delta - contentDelta := root.Get("delta.text").String() - if contentDelta != "" { - response := map[string]interface{}{ - "id": "chatcmpl-" + uuid.New().String()[:24], - "object": "chat.completion.chunk", - "created": time.Now().Unix(), - "model": model, - "choices": []map[string]interface{}{ - { - "index": 0, - "delta": map[string]interface{}{ - "content": contentDelta, - }, - "finish_reason": nil, - }, - }, - } - result, _ := json.Marshal(response) - results = append(results, string(result)) - } - } else if deltaType == "thinking_delta" { - // Thinking/reasoning content delta - convert to OpenAI reasoning_content format - thinkingDelta := root.Get("delta.thinking").String() - if thinkingDelta != "" { - response := map[string]interface{}{ - "id": "chatcmpl-" + uuid.New().String()[:24], - "object": "chat.completion.chunk", - "created": time.Now().Unix(), - "model": model, - "choices": []map[string]interface{}{ - { - "index": 0, - "delta": map[string]interface{}{ - "reasoning_content": thinkingDelta, - }, - "finish_reason": nil, - }, - }, - } - result, _ := json.Marshal(response) - results = append(results, string(result)) - } - } else if deltaType == "input_json_delta" { - // Tool input delta (streaming arguments) - partialJSON := root.Get("delta.partial_json").String() - if partialJSON != "" { - toolCall := map[string]interface{}{ - "index": index, - "function": map[string]interface{}{ - "arguments": partialJSON, - }, - } - - response := map[string]interface{}{ - "id": "chatcmpl-" + uuid.New().String()[:24], - "object": "chat.completion.chunk", - "created": time.Now().Unix(), - "model": model, - "choices": []map[string]interface{}{ - { - "index": 0, - "delta": map[string]interface{}{ - "tool_calls": []map[string]interface{}{toolCall}, - }, - "finish_reason": nil, - }, - }, - } - result, _ := json.Marshal(response) - results = append(results, string(result)) - } - } - return results - - case "content_block_stop": - // End of content block - no output needed for OpenAI format - return results - - case "message_delta": - // Final message delta with stop_reason and usage - stopReason := root.Get("delta.stop_reason").String() - if stopReason != "" { - finishReason := "stop" - if stopReason == "tool_use" { - finishReason = "tool_calls" - } else if stopReason == "end_turn" { - finishReason = "stop" - } else if stopReason == "max_tokens" { - finishReason = "length" - } - - response := map[string]interface{}{ - "id": "chatcmpl-" + uuid.New().String()[:24], - "object": "chat.completion.chunk", - "created": time.Now().Unix(), - "model": model, - "choices": []map[string]interface{}{ - { - "index": 0, - "delta": map[string]interface{}{}, - "finish_reason": finishReason, - }, - }, - } - - // Extract and include usage information from message_delta event - usage := root.Get("usage") - if usage.Exists() { - inputTokens := usage.Get("input_tokens").Int() - outputTokens := usage.Get("output_tokens").Int() - response["usage"] = map[string]interface{}{ - "prompt_tokens": inputTokens, - "completion_tokens": outputTokens, - "total_tokens": inputTokens + outputTokens, - } - } - - result, _ := json.Marshal(response) - results = append(results, string(result)) - } - return results - - case "message_stop": - // End of message - could emit [DONE] marker - return results - } - - // Fallback: handle raw content for backward compatibility - var contentDelta string - if delta := root.Get("delta.text"); delta.Exists() { - contentDelta = delta.String() - } else if content := root.Get("content"); content.Exists() && root.Get("type").String() == "" { - contentDelta = content.String() - } - - if contentDelta != "" { - response := map[string]interface{}{ - "id": "chatcmpl-" + uuid.New().String()[:24], - "object": "chat.completion.chunk", - "created": time.Now().Unix(), - "model": model, - "choices": []map[string]interface{}{ - { - "index": 0, - "delta": map[string]interface{}{ - "content": contentDelta, - }, - "finish_reason": nil, - }, - }, - } - result, _ := json.Marshal(response) - results = append(results, string(result)) - } - - // Handle tool_use content blocks (Claude format) - fallback - toolUses := root.Get("delta.tool_use") - if !toolUses.Exists() { - toolUses = root.Get("tool_use") - } - if toolUses.Exists() && toolUses.IsObject() { - inputJSON := toolUses.Get("input").String() - if inputJSON == "" { - if inputObj := toolUses.Get("input"); inputObj.Exists() { - inputBytes, _ := json.Marshal(inputObj.Value()) - inputJSON = string(inputBytes) - } - } - - toolCall := map[string]interface{}{ - "index": 0, - "id": toolUses.Get("id").String(), - "type": "function", - "function": map[string]interface{}{ - "name": toolUses.Get("name").String(), - "arguments": inputJSON, - }, - } - - response := map[string]interface{}{ - "id": "chatcmpl-" + uuid.New().String()[:24], - "object": "chat.completion.chunk", - "created": time.Now().Unix(), - "model": model, - "choices": []map[string]interface{}{ - { - "index": 0, - "delta": map[string]interface{}{ - "tool_calls": []map[string]interface{}{toolCall}, - }, - "finish_reason": nil, - }, - }, - } - result, _ := json.Marshal(response) - results = append(results, string(result)) - } - - return results -} - -// ConvertKiroResponseToOpenAINonStream converts Kiro non-streaming response to OpenAI format. -func ConvertKiroResponseToOpenAINonStream(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) string { - root := gjson.ParseBytes(rawResponse) - - var content string - var reasoningContent string - var toolCalls []map[string]interface{} - - contentArray := root.Get("content") - if contentArray.IsArray() { - for _, item := range contentArray.Array() { - itemType := item.Get("type").String() - if itemType == "text" { - content += item.Get("text").String() - } else if itemType == "thinking" { - // Extract thinking/reasoning content - reasoningContent += item.Get("thinking").String() - } else if itemType == "tool_use" { - // Convert Claude tool_use to OpenAI tool_calls format - inputJSON := item.Get("input").String() - if inputJSON == "" { - // If input is an object, marshal it - if inputObj := item.Get("input"); inputObj.Exists() { - inputBytes, _ := json.Marshal(inputObj.Value()) - inputJSON = string(inputBytes) - } - } - toolCall := map[string]interface{}{ - "id": item.Get("id").String(), - "type": "function", - "function": map[string]interface{}{ - "name": item.Get("name").String(), - "arguments": inputJSON, - }, - } - toolCalls = append(toolCalls, toolCall) - } - } - } else { - content = root.Get("content").String() - } - - inputTokens := root.Get("usage.input_tokens").Int() - outputTokens := root.Get("usage.output_tokens").Int() - - message := map[string]interface{}{ - "role": "assistant", - "content": content, - } - - // Add reasoning_content if present (OpenAI reasoning format) - if reasoningContent != "" { - message["reasoning_content"] = reasoningContent - } - - // Add tool_calls if present - if len(toolCalls) > 0 { - message["tool_calls"] = toolCalls - } - - finishReason := "stop" - if len(toolCalls) > 0 { - finishReason = "tool_calls" - } - - response := map[string]interface{}{ - "id": "chatcmpl-" + uuid.New().String()[:24], - "object": "chat.completion", - "created": time.Now().Unix(), - "model": model, - "choices": []map[string]interface{}{ - { - "index": 0, - "message": message, - "finish_reason": finishReason, - }, - }, - "usage": map[string]interface{}{ - "prompt_tokens": inputTokens, - "completion_tokens": outputTokens, - "total_tokens": inputTokens + outputTokens, - }, - } - - result, _ := json.Marshal(response) - return string(result) -} diff --git a/internal/translator/kiro/openai/chat-completions/init.go b/internal/translator/kiro/openai/init.go similarity index 56% rename from internal/translator/kiro/openai/chat-completions/init.go rename to internal/translator/kiro/openai/init.go index 2a99d0e0..653eed45 100644 --- a/internal/translator/kiro/openai/chat-completions/init.go +++ b/internal/translator/kiro/openai/init.go @@ -1,4 +1,5 @@ -package chat_completions +// Package openai provides translation between OpenAI Chat Completions and Kiro formats. +package openai import ( . "github.com/router-for-me/CLIProxyAPI/v6/internal/constant" @@ -8,12 +9,12 @@ import ( func init() { translator.Register( - OpenAI, - Kiro, + OpenAI, // source format + Kiro, // target format ConvertOpenAIRequestToKiro, interfaces.TranslateResponse{ - Stream: ConvertKiroResponseToOpenAI, - NonStream: ConvertKiroResponseToOpenAINonStream, + Stream: ConvertKiroStreamToOpenAI, + NonStream: ConvertKiroNonStreamToOpenAI, }, ) -} +} \ No newline at end of file diff --git a/internal/translator/kiro/openai/kiro_openai.go b/internal/translator/kiro/openai/kiro_openai.go new file mode 100644 index 00000000..35cd0424 --- /dev/null +++ b/internal/translator/kiro/openai/kiro_openai.go @@ -0,0 +1,368 @@ +// Package openai provides translation between OpenAI Chat Completions and Kiro formats. +// This package enables direct OpenAI → Kiro translation, bypassing the Claude intermediate layer. +// +// The Kiro executor generates Claude-compatible SSE format internally, so the streaming response +// translation converts from Claude SSE format to OpenAI SSE format. +package openai + +import ( + "bytes" + "context" + "encoding/json" + "strings" + + kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common" + "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage" + log "github.com/sirupsen/logrus" + "github.com/tidwall/gjson" +) + +// ConvertKiroStreamToOpenAI converts Kiro streaming response to OpenAI format. +// The Kiro executor emits Claude-compatible SSE events, so this function translates +// from Claude SSE format to OpenAI SSE format. +// +// Claude SSE format: +// - event: message_start\ndata: {...} +// - event: content_block_start\ndata: {...} +// - event: content_block_delta\ndata: {...} +// - event: content_block_stop\ndata: {...} +// - event: message_delta\ndata: {...} +// - event: message_stop\ndata: {...} +// +// OpenAI SSE format: +// - data: {"id":"...","object":"chat.completion.chunk",...} +// - data: [DONE] +func ConvertKiroStreamToOpenAI(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string { + // Initialize state if needed + if *param == nil { + *param = NewOpenAIStreamState(model) + } + state := (*param).(*OpenAIStreamState) + + // Parse the Claude SSE event + responseStr := string(rawResponse) + + // Handle raw event format (event: xxx\ndata: {...}) + var eventType string + var eventData string + + if strings.HasPrefix(responseStr, "event:") { + // Parse event type and data + lines := strings.SplitN(responseStr, "\n", 2) + if len(lines) >= 1 { + eventType = strings.TrimSpace(strings.TrimPrefix(lines[0], "event:")) + } + if len(lines) >= 2 && strings.HasPrefix(lines[1], "data:") { + eventData = strings.TrimSpace(strings.TrimPrefix(lines[1], "data:")) + } + } else if strings.HasPrefix(responseStr, "data:") { + // Just data line + eventData = strings.TrimSpace(strings.TrimPrefix(responseStr, "data:")) + } else { + // Try to parse as raw JSON + eventData = strings.TrimSpace(responseStr) + } + + if eventData == "" { + return []string{} + } + + // Parse the event data as JSON + eventJSON := gjson.Parse(eventData) + if !eventJSON.Exists() { + return []string{} + } + + // Determine event type from JSON if not already set + if eventType == "" { + eventType = eventJSON.Get("type").String() + } + + var results []string + + switch eventType { + case "message_start": + // Send first chunk with role + firstChunk := BuildOpenAISSEFirstChunk(state) + results = append(results, firstChunk) + + case "content_block_start": + // Check block type + blockType := eventJSON.Get("content_block.type").String() + switch blockType { + case "text": + // Text block starting - nothing to emit yet + case "thinking": + // Thinking block starting - nothing to emit yet for OpenAI + case "tool_use": + // Tool use block starting + toolUseID := eventJSON.Get("content_block.id").String() + toolName := eventJSON.Get("content_block.name").String() + chunk := BuildOpenAISSEToolCallStart(state, toolUseID, toolName) + results = append(results, chunk) + state.ToolCallIndex++ + } + + case "content_block_delta": + deltaType := eventJSON.Get("delta.type").String() + switch deltaType { + case "text_delta": + textDelta := eventJSON.Get("delta.text").String() + if textDelta != "" { + chunk := BuildOpenAISSETextDelta(state, textDelta) + results = append(results, chunk) + } + case "thinking_delta": + // Convert thinking to reasoning_content for o1-style compatibility + thinkingDelta := eventJSON.Get("delta.thinking").String() + if thinkingDelta != "" { + chunk := BuildOpenAISSEReasoningDelta(state, thinkingDelta) + results = append(results, chunk) + } + case "input_json_delta": + // Tool call arguments delta + partialJSON := eventJSON.Get("delta.partial_json").String() + if partialJSON != "" { + // Get the tool index from content block index + blockIndex := int(eventJSON.Get("index").Int()) + chunk := BuildOpenAISSEToolCallArgumentsDelta(state, partialJSON, blockIndex-1) // Adjust for 0-based tool index + results = append(results, chunk) + } + } + + case "content_block_stop": + // Content block ended - nothing to emit for OpenAI + + case "message_delta": + // Message delta with stop_reason + stopReason := eventJSON.Get("delta.stop_reason").String() + finishReason := mapKiroStopReasonToOpenAI(stopReason) + if finishReason != "" { + chunk := BuildOpenAISSEFinish(state, finishReason) + results = append(results, chunk) + } + + // Extract usage if present + if eventJSON.Get("usage").Exists() { + inputTokens := eventJSON.Get("usage.input_tokens").Int() + outputTokens := eventJSON.Get("usage.output_tokens").Int() + usageInfo := usage.Detail{ + InputTokens: inputTokens, + OutputTokens: outputTokens, + TotalTokens: inputTokens + outputTokens, + } + chunk := BuildOpenAISSEUsage(state, usageInfo) + results = append(results, chunk) + } + + case "message_stop": + // Final event - emit [DONE] + results = append(results, BuildOpenAISSEDone()) + + case "ping": + // Ping event with usage - optionally emit usage chunk + if eventJSON.Get("usage").Exists() { + inputTokens := eventJSON.Get("usage.input_tokens").Int() + outputTokens := eventJSON.Get("usage.output_tokens").Int() + usageInfo := usage.Detail{ + InputTokens: inputTokens, + OutputTokens: outputTokens, + TotalTokens: inputTokens + outputTokens, + } + chunk := BuildOpenAISSEUsage(state, usageInfo) + results = append(results, chunk) + } + } + + return results +} + +// ConvertKiroNonStreamToOpenAI converts Kiro non-streaming response to OpenAI format. +// The Kiro executor returns Claude-compatible JSON responses, so this function translates +// from Claude format to OpenAI format. +func ConvertKiroNonStreamToOpenAI(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) string { + // Parse the Claude-format response + response := gjson.ParseBytes(rawResponse) + + // Extract content + var content string + var toolUses []KiroToolUse + var stopReason string + + // Get stop_reason + stopReason = response.Get("stop_reason").String() + + // Process content blocks + contentBlocks := response.Get("content") + if contentBlocks.IsArray() { + for _, block := range contentBlocks.Array() { + blockType := block.Get("type").String() + switch blockType { + case "text": + content += block.Get("text").String() + case "thinking": + // Skip thinking blocks for OpenAI format (or convert to reasoning_content if needed) + case "tool_use": + toolUseID := block.Get("id").String() + toolName := block.Get("name").String() + toolInput := block.Get("input") + + var inputMap map[string]interface{} + if toolInput.IsObject() { + inputMap = make(map[string]interface{}) + toolInput.ForEach(func(key, value gjson.Result) bool { + inputMap[key.String()] = value.Value() + return true + }) + } + + toolUses = append(toolUses, KiroToolUse{ + ToolUseID: toolUseID, + Name: toolName, + Input: inputMap, + }) + } + } + } + + // Extract usage + usageInfo := usage.Detail{ + InputTokens: response.Get("usage.input_tokens").Int(), + OutputTokens: response.Get("usage.output_tokens").Int(), + } + usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens + + // Build OpenAI response + openaiResponse := BuildOpenAIResponse(content, toolUses, model, usageInfo, stopReason) + return string(openaiResponse) +} + +// ParseClaudeEvent parses a Claude SSE event and returns the event type and data +func ParseClaudeEvent(rawEvent []byte) (eventType string, eventData []byte) { + lines := bytes.Split(rawEvent, []byte("\n")) + for _, line := range lines { + line = bytes.TrimSpace(line) + if bytes.HasPrefix(line, []byte("event:")) { + eventType = string(bytes.TrimSpace(bytes.TrimPrefix(line, []byte("event:")))) + } else if bytes.HasPrefix(line, []byte("data:")) { + eventData = bytes.TrimSpace(bytes.TrimPrefix(line, []byte("data:"))) + } + } + return eventType, eventData +} + +// ExtractThinkingFromContent parses content to extract thinking blocks. +// Returns cleaned content (without thinking tags) and whether thinking was found. +func ExtractThinkingFromContent(content string) (string, string, bool) { + if !strings.Contains(content, kirocommon.ThinkingStartTag) { + return content, "", false + } + + var cleanedContent strings.Builder + var thinkingContent strings.Builder + hasThinking := false + remaining := content + + for len(remaining) > 0 { + startIdx := strings.Index(remaining, kirocommon.ThinkingStartTag) + if startIdx == -1 { + cleanedContent.WriteString(remaining) + break + } + + // Add content before thinking tag + cleanedContent.WriteString(remaining[:startIdx]) + + // Move past opening tag + remaining = remaining[startIdx+len(kirocommon.ThinkingStartTag):] + + // Find closing tag + endIdx := strings.Index(remaining, kirocommon.ThinkingEndTag) + if endIdx == -1 { + // No closing tag - treat rest as thinking + thinkingContent.WriteString(remaining) + hasThinking = true + break + } + + // Extract thinking content + thinkingContent.WriteString(remaining[:endIdx]) + hasThinking = true + remaining = remaining[endIdx+len(kirocommon.ThinkingEndTag):] + } + + return strings.TrimSpace(cleanedContent.String()), strings.TrimSpace(thinkingContent.String()), hasThinking +} + +// ConvertOpenAIToolsToKiroFormat is a helper that converts OpenAI tools format to Kiro format +func ConvertOpenAIToolsToKiroFormat(tools []map[string]interface{}) []KiroToolWrapper { + var kiroTools []KiroToolWrapper + + for _, tool := range tools { + toolType, _ := tool["type"].(string) + if toolType != "function" { + continue + } + + fn, ok := tool["function"].(map[string]interface{}) + if !ok { + continue + } + + name := kirocommon.GetString(fn, "name") + description := kirocommon.GetString(fn, "description") + parameters := fn["parameters"] + + if name == "" { + continue + } + + if description == "" { + description = "Tool: " + name + } + + kiroTools = append(kiroTools, KiroToolWrapper{ + ToolSpecification: KiroToolSpecification{ + Name: name, + Description: description, + InputSchema: KiroInputSchema{JSON: parameters}, + }, + }) + } + + return kiroTools +} + +// OpenAIStreamParams holds parameters for OpenAI streaming conversion +type OpenAIStreamParams struct { + State *OpenAIStreamState + ThinkingState *ThinkingTagState + ToolCallsEmitted map[string]bool +} + +// NewOpenAIStreamParams creates new streaming parameters +func NewOpenAIStreamParams(model string) *OpenAIStreamParams { + return &OpenAIStreamParams{ + State: NewOpenAIStreamState(model), + ThinkingState: NewThinkingTagState(), + ToolCallsEmitted: make(map[string]bool), + } +} + +// ConvertClaudeToolUseToOpenAI converts a Claude tool_use block to OpenAI tool_calls format +func ConvertClaudeToolUseToOpenAI(toolUseID, toolName string, input map[string]interface{}) map[string]interface{} { + inputJSON, _ := json.Marshal(input) + return map[string]interface{}{ + "id": toolUseID, + "type": "function", + "function": map[string]interface{}{ + "name": toolName, + "arguments": string(inputJSON), + }, + } +} + +// LogStreamEvent logs a streaming event for debugging +func LogStreamEvent(eventType, data string) { + log.Debugf("kiro-openai: stream event type=%s, data_len=%d", eventType, len(data)) +} \ No newline at end of file diff --git a/internal/translator/kiro/openai/kiro_openai_request.go b/internal/translator/kiro/openai/kiro_openai_request.go new file mode 100644 index 00000000..4aaa8b4e --- /dev/null +++ b/internal/translator/kiro/openai/kiro_openai_request.go @@ -0,0 +1,604 @@ +// Package openai provides request translation from OpenAI Chat Completions to Kiro format. +// It handles parsing and transforming OpenAI API requests into the Kiro/Amazon Q API format, +// extracting model information, system instructions, message contents, and tool declarations. +package openai + +import ( + "encoding/json" + "fmt" + "strings" + "time" + "unicode/utf8" + + "github.com/google/uuid" + kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common" + log "github.com/sirupsen/logrus" + "github.com/tidwall/gjson" +) + +// Kiro API request structs - reuse from kiroclaude package structure + +// KiroPayload is the top-level request structure for Kiro API +type KiroPayload struct { + ConversationState KiroConversationState `json:"conversationState"` + ProfileArn string `json:"profileArn,omitempty"` + InferenceConfig *KiroInferenceConfig `json:"inferenceConfig,omitempty"` +} + +// KiroInferenceConfig contains inference parameters for the Kiro API. +type KiroInferenceConfig struct { + MaxTokens int `json:"maxTokens,omitempty"` + Temperature float64 `json:"temperature,omitempty"` +} + +// KiroConversationState holds the conversation context +type KiroConversationState struct { + ChatTriggerType string `json:"chatTriggerType"` // Required: "MANUAL" + ConversationID string `json:"conversationId"` + CurrentMessage KiroCurrentMessage `json:"currentMessage"` + History []KiroHistoryMessage `json:"history,omitempty"` +} + +// KiroCurrentMessage wraps the current user message +type KiroCurrentMessage struct { + UserInputMessage KiroUserInputMessage `json:"userInputMessage"` +} + +// KiroHistoryMessage represents a message in the conversation history +type KiroHistoryMessage struct { + UserInputMessage *KiroUserInputMessage `json:"userInputMessage,omitempty"` + AssistantResponseMessage *KiroAssistantResponseMessage `json:"assistantResponseMessage,omitempty"` +} + +// KiroImage represents an image in Kiro API format +type KiroImage struct { + Format string `json:"format"` + Source KiroImageSource `json:"source"` +} + +// KiroImageSource contains the image data +type KiroImageSource struct { + Bytes string `json:"bytes"` // base64 encoded image data +} + +// KiroUserInputMessage represents a user message +type KiroUserInputMessage struct { + Content string `json:"content"` + ModelID string `json:"modelId"` + Origin string `json:"origin"` + Images []KiroImage `json:"images,omitempty"` + UserInputMessageContext *KiroUserInputMessageContext `json:"userInputMessageContext,omitempty"` +} + +// KiroUserInputMessageContext contains tool-related context +type KiroUserInputMessageContext struct { + ToolResults []KiroToolResult `json:"toolResults,omitempty"` + Tools []KiroToolWrapper `json:"tools,omitempty"` +} + +// KiroToolResult represents a tool execution result +type KiroToolResult struct { + Content []KiroTextContent `json:"content"` + Status string `json:"status"` + ToolUseID string `json:"toolUseId"` +} + +// KiroTextContent represents text content +type KiroTextContent struct { + Text string `json:"text"` +} + +// KiroToolWrapper wraps a tool specification +type KiroToolWrapper struct { + ToolSpecification KiroToolSpecification `json:"toolSpecification"` +} + +// KiroToolSpecification defines a tool's schema +type KiroToolSpecification struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema KiroInputSchema `json:"inputSchema"` +} + +// KiroInputSchema wraps the JSON schema for tool input +type KiroInputSchema struct { + JSON interface{} `json:"json"` +} + +// KiroAssistantResponseMessage represents an assistant message +type KiroAssistantResponseMessage struct { + Content string `json:"content"` + ToolUses []KiroToolUse `json:"toolUses,omitempty"` +} + +// KiroToolUse represents a tool invocation by the assistant +type KiroToolUse struct { + ToolUseID string `json:"toolUseId"` + Name string `json:"name"` + Input map[string]interface{} `json:"input"` +} + +// ConvertOpenAIRequestToKiro converts an OpenAI Chat Completions request to Kiro format. +// This is the main entry point for request translation. +// Note: The actual payload building happens in the executor, this just passes through +// the OpenAI format which will be converted by BuildKiroPayloadFromOpenAI. +func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bool) []byte { + // Pass through the OpenAI format - actual conversion happens in BuildKiroPayloadFromOpenAI + return inputRawJSON +} + +// BuildKiroPayloadFromOpenAI constructs the Kiro API request payload from OpenAI format. +// Supports tool calling - tools are passed via userInputMessageContext. +// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE. +// isAgentic parameter enables chunked write optimization prompt for -agentic model variants. +// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode). +func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte { + // Extract max_tokens for potential use in inferenceConfig + var maxTokens int64 + if mt := gjson.GetBytes(openaiBody, "max_tokens"); mt.Exists() { + maxTokens = mt.Int() + } + + // Extract temperature if specified + var temperature float64 + var hasTemperature bool + if temp := gjson.GetBytes(openaiBody, "temperature"); temp.Exists() { + temperature = temp.Float() + hasTemperature = true + } + + // Normalize origin value for Kiro API compatibility + origin = normalizeOrigin(origin) + log.Debugf("kiro-openai: normalized origin value: %s", origin) + + messages := gjson.GetBytes(openaiBody, "messages") + + // For chat-only mode, don't include tools + var tools gjson.Result + if !isChatOnly { + tools = gjson.GetBytes(openaiBody, "tools") + } + + // Extract system prompt from messages + systemPrompt := extractSystemPromptFromOpenAI(messages) + + // Inject timestamp context + timestamp := time.Now().Format("2006-01-02 15:04:05 MST") + timestampContext := fmt.Sprintf("[Context: Current time is %s]", timestamp) + if systemPrompt != "" { + systemPrompt = timestampContext + "\n\n" + systemPrompt + } else { + systemPrompt = timestampContext + } + log.Debugf("kiro-openai: injected timestamp context: %s", timestamp) + + // Inject agentic optimization prompt for -agentic model variants + if isAgentic { + if systemPrompt != "" { + systemPrompt += "\n" + } + systemPrompt += kirocommon.KiroAgenticSystemPrompt + } + + // Convert OpenAI tools to Kiro format + kiroTools := convertOpenAIToolsToKiro(tools) + + // Process messages and build history + history, currentUserMsg, currentToolResults := processOpenAIMessages(messages, modelID, origin) + + // Build content with system prompt + if currentUserMsg != nil { + currentUserMsg.Content = buildFinalContent(currentUserMsg.Content, systemPrompt, currentToolResults) + + // Deduplicate currentToolResults + currentToolResults = deduplicateToolResults(currentToolResults) + + // Build userInputMessageContext with tools and tool results + if len(kiroTools) > 0 || len(currentToolResults) > 0 { + currentUserMsg.UserInputMessageContext = &KiroUserInputMessageContext{ + Tools: kiroTools, + ToolResults: currentToolResults, + } + } + } + + // Build payload + var currentMessage KiroCurrentMessage + if currentUserMsg != nil { + currentMessage = KiroCurrentMessage{UserInputMessage: *currentUserMsg} + } else { + fallbackContent := "" + if systemPrompt != "" { + fallbackContent = "--- SYSTEM PROMPT ---\n" + systemPrompt + "\n--- END SYSTEM PROMPT ---\n" + } + currentMessage = KiroCurrentMessage{UserInputMessage: KiroUserInputMessage{ + Content: fallbackContent, + ModelID: modelID, + Origin: origin, + }} + } + + // Build inferenceConfig if we have any inference parameters + var inferenceConfig *KiroInferenceConfig + if maxTokens > 0 || hasTemperature { + inferenceConfig = &KiroInferenceConfig{} + if maxTokens > 0 { + inferenceConfig.MaxTokens = int(maxTokens) + } + if hasTemperature { + inferenceConfig.Temperature = temperature + } + } + + payload := KiroPayload{ + ConversationState: KiroConversationState{ + ChatTriggerType: "MANUAL", + ConversationID: uuid.New().String(), + CurrentMessage: currentMessage, + History: history, + }, + ProfileArn: profileArn, + InferenceConfig: inferenceConfig, + } + + result, err := json.Marshal(payload) + if err != nil { + log.Debugf("kiro-openai: failed to marshal payload: %v", err) + return nil + } + + return result +} + +// normalizeOrigin normalizes origin value for Kiro API compatibility +func normalizeOrigin(origin string) string { + switch origin { + case "KIRO_CLI": + return "CLI" + case "KIRO_AI_EDITOR": + return "AI_EDITOR" + case "AMAZON_Q": + return "CLI" + case "KIRO_IDE": + return "AI_EDITOR" + default: + return origin + } +} + +// extractSystemPromptFromOpenAI extracts system prompt from OpenAI messages +func extractSystemPromptFromOpenAI(messages gjson.Result) string { + if !messages.IsArray() { + return "" + } + + var systemParts []string + for _, msg := range messages.Array() { + if msg.Get("role").String() == "system" { + content := msg.Get("content") + if content.Type == gjson.String { + systemParts = append(systemParts, content.String()) + } else if content.IsArray() { + // Handle array content format + for _, part := range content.Array() { + if part.Get("type").String() == "text" { + systemParts = append(systemParts, part.Get("text").String()) + } + } + } + } + } + + return strings.Join(systemParts, "\n") +} + +// convertOpenAIToolsToKiro converts OpenAI tools to Kiro format +func convertOpenAIToolsToKiro(tools gjson.Result) []KiroToolWrapper { + var kiroTools []KiroToolWrapper + if !tools.IsArray() { + return kiroTools + } + + for _, tool := range tools.Array() { + // OpenAI tools have type "function" with function definition inside + if tool.Get("type").String() != "function" { + continue + } + + fn := tool.Get("function") + if !fn.Exists() { + continue + } + + name := fn.Get("name").String() + description := fn.Get("description").String() + parameters := fn.Get("parameters").Value() + + // CRITICAL FIX: Kiro API requires non-empty description + if strings.TrimSpace(description) == "" { + description = fmt.Sprintf("Tool: %s", name) + log.Debugf("kiro-openai: tool '%s' has empty description, using default: %s", name, description) + } + + // Truncate long descriptions + if len(description) > kirocommon.KiroMaxToolDescLen { + truncLen := kirocommon.KiroMaxToolDescLen - 30 + for truncLen > 0 && !utf8.RuneStart(description[truncLen]) { + truncLen-- + } + description = description[:truncLen] + "... (description truncated)" + } + + kiroTools = append(kiroTools, KiroToolWrapper{ + ToolSpecification: KiroToolSpecification{ + Name: name, + Description: description, + InputSchema: KiroInputSchema{JSON: parameters}, + }, + }) + } + + return kiroTools +} + +// processOpenAIMessages processes OpenAI messages and builds Kiro history +func processOpenAIMessages(messages gjson.Result, modelID, origin string) ([]KiroHistoryMessage, *KiroUserInputMessage, []KiroToolResult) { + var history []KiroHistoryMessage + var currentUserMsg *KiroUserInputMessage + var currentToolResults []KiroToolResult + + if !messages.IsArray() { + return history, currentUserMsg, currentToolResults + } + + // Merge adjacent messages with the same role + messagesArray := kirocommon.MergeAdjacentMessages(messages.Array()) + + // Build tool_call_id to name mapping from assistant messages + toolCallIDToName := make(map[string]string) + for _, msg := range messagesArray { + if msg.Get("role").String() == "assistant" { + toolCalls := msg.Get("tool_calls") + if toolCalls.IsArray() { + for _, tc := range toolCalls.Array() { + if tc.Get("type").String() == "function" { + id := tc.Get("id").String() + name := tc.Get("function.name").String() + if id != "" && name != "" { + toolCallIDToName[id] = name + } + } + } + } + } + } + + for i, msg := range messagesArray { + role := msg.Get("role").String() + isLastMessage := i == len(messagesArray)-1 + + switch role { + case "system": + // System messages are handled separately via extractSystemPromptFromOpenAI + continue + + case "user": + userMsg, toolResults := buildUserMessageFromOpenAI(msg, modelID, origin) + if isLastMessage { + currentUserMsg = &userMsg + currentToolResults = toolResults + } else { + // CRITICAL: Kiro API requires content to be non-empty for history messages + if strings.TrimSpace(userMsg.Content) == "" { + if len(toolResults) > 0 { + userMsg.Content = "Tool results provided." + } else { + userMsg.Content = "Continue" + } + } + // For history messages, embed tool results in context + if len(toolResults) > 0 { + userMsg.UserInputMessageContext = &KiroUserInputMessageContext{ + ToolResults: toolResults, + } + } + history = append(history, KiroHistoryMessage{ + UserInputMessage: &userMsg, + }) + } + + case "assistant": + assistantMsg := buildAssistantMessageFromOpenAI(msg) + if isLastMessage { + history = append(history, KiroHistoryMessage{ + AssistantResponseMessage: &assistantMsg, + }) + // Create a "Continue" user message as currentMessage + currentUserMsg = &KiroUserInputMessage{ + Content: "Continue", + ModelID: modelID, + Origin: origin, + } + } else { + history = append(history, KiroHistoryMessage{ + AssistantResponseMessage: &assistantMsg, + }) + } + + case "tool": + // Tool messages in OpenAI format provide results for tool_calls + // These are typically followed by user or assistant messages + // Process them and merge into the next user message's tool results + toolCallID := msg.Get("tool_call_id").String() + content := msg.Get("content").String() + + if toolCallID != "" { + toolResult := KiroToolResult{ + ToolUseID: toolCallID, + Content: []KiroTextContent{{Text: content}}, + Status: "success", + } + // Tool results should be included in the next user message + // For now, collect them and they'll be handled when we build the current message + currentToolResults = append(currentToolResults, toolResult) + } + } + } + + return history, currentUserMsg, currentToolResults +} + +// buildUserMessageFromOpenAI builds a user message from OpenAI format and extracts tool results +func buildUserMessageFromOpenAI(msg gjson.Result, modelID, origin string) (KiroUserInputMessage, []KiroToolResult) { + content := msg.Get("content") + var contentBuilder strings.Builder + var toolResults []KiroToolResult + var images []KiroImage + + // Track seen toolCallIds to deduplicate + seenToolCallIDs := make(map[string]bool) + + if content.IsArray() { + for _, part := range content.Array() { + partType := part.Get("type").String() + switch partType { + case "text": + contentBuilder.WriteString(part.Get("text").String()) + case "image_url": + imageURL := part.Get("image_url.url").String() + if strings.HasPrefix(imageURL, "data:") { + // Parse data URL: data:image/png;base64,xxxxx + if idx := strings.Index(imageURL, ";base64,"); idx != -1 { + mediaType := imageURL[5:idx] // Skip "data:" + data := imageURL[idx+8:] // Skip ";base64," + + format := "" + if lastSlash := strings.LastIndex(mediaType, "/"); lastSlash != -1 { + format = mediaType[lastSlash+1:] + } + + if format != "" && data != "" { + images = append(images, KiroImage{ + Format: format, + Source: KiroImageSource{ + Bytes: data, + }, + }) + } + } + } + } + } + } else if content.Type == gjson.String { + contentBuilder.WriteString(content.String()) + } + + // Check for tool_calls in the message (shouldn't be in user messages, but handle edge cases) + _ = seenToolCallIDs // Used for deduplication if needed + + userMsg := KiroUserInputMessage{ + Content: contentBuilder.String(), + ModelID: modelID, + Origin: origin, + } + + if len(images) > 0 { + userMsg.Images = images + } + + return userMsg, toolResults +} + +// buildAssistantMessageFromOpenAI builds an assistant message from OpenAI format +func buildAssistantMessageFromOpenAI(msg gjson.Result) KiroAssistantResponseMessage { + content := msg.Get("content") + var contentBuilder strings.Builder + var toolUses []KiroToolUse + + // Handle content + if content.Type == gjson.String { + contentBuilder.WriteString(content.String()) + } else if content.IsArray() { + for _, part := range content.Array() { + if part.Get("type").String() == "text" { + contentBuilder.WriteString(part.Get("text").String()) + } + } + } + + // Handle tool_calls + toolCalls := msg.Get("tool_calls") + if toolCalls.IsArray() { + for _, tc := range toolCalls.Array() { + if tc.Get("type").String() != "function" { + continue + } + + toolUseID := tc.Get("id").String() + toolName := tc.Get("function.name").String() + toolArgs := tc.Get("function.arguments").String() + + var inputMap map[string]interface{} + if err := json.Unmarshal([]byte(toolArgs), &inputMap); err != nil { + log.Debugf("kiro-openai: failed to parse tool arguments: %v", err) + inputMap = make(map[string]interface{}) + } + + toolUses = append(toolUses, KiroToolUse{ + ToolUseID: toolUseID, + Name: toolName, + Input: inputMap, + }) + } + } + + return KiroAssistantResponseMessage{ + Content: contentBuilder.String(), + ToolUses: toolUses, + } +} + +// buildFinalContent builds the final content with system prompt +func buildFinalContent(content, systemPrompt string, toolResults []KiroToolResult) string { + var contentBuilder strings.Builder + + if systemPrompt != "" { + contentBuilder.WriteString("--- SYSTEM PROMPT ---\n") + contentBuilder.WriteString(systemPrompt) + contentBuilder.WriteString("\n--- END SYSTEM PROMPT ---\n\n") + } + + contentBuilder.WriteString(content) + finalContent := contentBuilder.String() + + // CRITICAL: Kiro API requires content to be non-empty + if strings.TrimSpace(finalContent) == "" { + if len(toolResults) > 0 { + finalContent = "Tool results provided." + } else { + finalContent = "Continue" + } + log.Debugf("kiro-openai: content was empty, using default: %s", finalContent) + } + + return finalContent +} + +// deduplicateToolResults removes duplicate tool results +func deduplicateToolResults(toolResults []KiroToolResult) []KiroToolResult { + if len(toolResults) == 0 { + return toolResults + } + + seenIDs := make(map[string]bool) + unique := make([]KiroToolResult, 0, len(toolResults)) + for _, tr := range toolResults { + if !seenIDs[tr.ToolUseID] { + seenIDs[tr.ToolUseID] = true + unique = append(unique, tr) + } else { + log.Debugf("kiro-openai: skipping duplicate toolResult: %s", tr.ToolUseID) + } + } + return unique +} \ No newline at end of file diff --git a/internal/translator/kiro/openai/kiro_openai_response.go b/internal/translator/kiro/openai/kiro_openai_response.go new file mode 100644 index 00000000..b7da1373 --- /dev/null +++ b/internal/translator/kiro/openai/kiro_openai_response.go @@ -0,0 +1,264 @@ +// Package openai provides response translation from Kiro to OpenAI format. +// This package handles the conversion of Kiro API responses into OpenAI Chat Completions-compatible +// JSON format, transforming streaming events and non-streaming responses. +package openai + +import ( + "encoding/json" + "fmt" + "sync/atomic" + "time" + + "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage" + log "github.com/sirupsen/logrus" +) + +// functionCallIDCounter provides a process-wide unique counter for function call identifiers. +var functionCallIDCounter uint64 + +// BuildOpenAIResponse constructs an OpenAI Chat Completions-compatible response. +// Supports tool_calls when tools are present in the response. +// stopReason is passed from upstream; fallback logic applied if empty. +func BuildOpenAIResponse(content string, toolUses []KiroToolUse, model string, usageInfo usage.Detail, stopReason string) []byte { + // Build the message object + message := map[string]interface{}{ + "role": "assistant", + "content": content, + } + + // Add tool_calls if present + if len(toolUses) > 0 { + var toolCalls []map[string]interface{} + for i, tu := range toolUses { + inputJSON, _ := json.Marshal(tu.Input) + toolCalls = append(toolCalls, map[string]interface{}{ + "id": tu.ToolUseID, + "type": "function", + "index": i, + "function": map[string]interface{}{ + "name": tu.Name, + "arguments": string(inputJSON), + }, + }) + } + message["tool_calls"] = toolCalls + // When tool_calls are present, content should be null according to OpenAI spec + if content == "" { + message["content"] = nil + } + } + + // Use upstream stopReason; apply fallback logic if not provided + finishReason := mapKiroStopReasonToOpenAI(stopReason) + if finishReason == "" { + finishReason = "stop" + if len(toolUses) > 0 { + finishReason = "tool_calls" + } + log.Debugf("kiro-openai: buildOpenAIResponse using fallback finish_reason: %s", finishReason) + } + + response := map[string]interface{}{ + "id": "chatcmpl-" + uuid.New().String()[:24], + "object": "chat.completion", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{ + { + "index": 0, + "message": message, + "finish_reason": finishReason, + }, + }, + "usage": map[string]interface{}{ + "prompt_tokens": usageInfo.InputTokens, + "completion_tokens": usageInfo.OutputTokens, + "total_tokens": usageInfo.InputTokens + usageInfo.OutputTokens, + }, + } + + result, _ := json.Marshal(response) + return result +} + +// mapKiroStopReasonToOpenAI converts Kiro/Claude stop_reason to OpenAI finish_reason +func mapKiroStopReasonToOpenAI(stopReason string) string { + switch stopReason { + case "end_turn": + return "stop" + case "stop_sequence": + return "stop" + case "tool_use": + return "tool_calls" + case "max_tokens": + return "length" + case "content_filtered": + return "content_filter" + default: + return stopReason + } +} + +// BuildOpenAIStreamChunk constructs an OpenAI Chat Completions streaming chunk. +// This is the delta format used in streaming responses. +func BuildOpenAIStreamChunk(model string, deltaContent string, deltaToolCalls []map[string]interface{}, finishReason string, index int) []byte { + delta := map[string]interface{}{} + + // First chunk should include role + if index == 0 && deltaContent == "" && len(deltaToolCalls) == 0 { + delta["role"] = "assistant" + delta["content"] = "" + } else if deltaContent != "" { + delta["content"] = deltaContent + } + + // Add tool_calls delta if present + if len(deltaToolCalls) > 0 { + delta["tool_calls"] = deltaToolCalls + } + + choice := map[string]interface{}{ + "index": 0, + "delta": delta, + } + + if finishReason != "" { + choice["finish_reason"] = finishReason + } else { + choice["finish_reason"] = nil + } + + chunk := map[string]interface{}{ + "id": "chatcmpl-" + uuid.New().String()[:12], + "object": "chat.completion.chunk", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{choice}, + } + + result, _ := json.Marshal(chunk) + return result +} + +// BuildOpenAIStreamChunkWithToolCallStart creates a stream chunk for tool call start +func BuildOpenAIStreamChunkWithToolCallStart(model string, toolUseID, toolName string, toolIndex int) []byte { + toolCall := map[string]interface{}{ + "index": toolIndex, + "id": toolUseID, + "type": "function", + "function": map[string]interface{}{ + "name": toolName, + "arguments": "", + }, + } + + delta := map[string]interface{}{ + "tool_calls": []map[string]interface{}{toolCall}, + } + + choice := map[string]interface{}{ + "index": 0, + "delta": delta, + "finish_reason": nil, + } + + chunk := map[string]interface{}{ + "id": "chatcmpl-" + uuid.New().String()[:12], + "object": "chat.completion.chunk", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{choice}, + } + + result, _ := json.Marshal(chunk) + return result +} + +// BuildOpenAIStreamChunkWithToolCallDelta creates a stream chunk for tool call arguments delta +func BuildOpenAIStreamChunkWithToolCallDelta(model string, argumentsDelta string, toolIndex int) []byte { + toolCall := map[string]interface{}{ + "index": toolIndex, + "function": map[string]interface{}{ + "arguments": argumentsDelta, + }, + } + + delta := map[string]interface{}{ + "tool_calls": []map[string]interface{}{toolCall}, + } + + choice := map[string]interface{}{ + "index": 0, + "delta": delta, + "finish_reason": nil, + } + + chunk := map[string]interface{}{ + "id": "chatcmpl-" + uuid.New().String()[:12], + "object": "chat.completion.chunk", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{choice}, + } + + result, _ := json.Marshal(chunk) + return result +} + +// BuildOpenAIStreamDoneChunk creates the final [DONE] stream event +func BuildOpenAIStreamDoneChunk() []byte { + return []byte("data: [DONE]") +} + +// BuildOpenAIStreamFinishChunk creates the final chunk with finish_reason +func BuildOpenAIStreamFinishChunk(model string, finishReason string) []byte { + choice := map[string]interface{}{ + "index": 0, + "delta": map[string]interface{}{}, + "finish_reason": finishReason, + } + + chunk := map[string]interface{}{ + "id": "chatcmpl-" + uuid.New().String()[:12], + "object": "chat.completion.chunk", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{choice}, + } + + result, _ := json.Marshal(chunk) + return result +} + +// BuildOpenAIStreamUsageChunk creates a chunk with usage information (optional, for stream_options.include_usage) +func BuildOpenAIStreamUsageChunk(model string, usageInfo usage.Detail) []byte { + chunk := map[string]interface{}{ + "id": "chatcmpl-" + uuid.New().String()[:12], + "object": "chat.completion.chunk", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{}, + "usage": map[string]interface{}{ + "prompt_tokens": usageInfo.InputTokens, + "completion_tokens": usageInfo.OutputTokens, + "total_tokens": usageInfo.InputTokens + usageInfo.OutputTokens, + }, + } + + result, _ := json.Marshal(chunk) + return result +} + +// GenerateToolCallID generates a unique tool call ID in OpenAI format +func GenerateToolCallID(toolName string) string { + return fmt.Sprintf("call_%s_%d_%d", toolName[:min(8, len(toolName))], time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)) +} + +// min returns the minimum of two integers +func min(a, b int) int { + if a < b { + return a + } + return b +} \ No newline at end of file diff --git a/internal/translator/kiro/openai/kiro_openai_stream.go b/internal/translator/kiro/openai/kiro_openai_stream.go new file mode 100644 index 00000000..d550a8d8 --- /dev/null +++ b/internal/translator/kiro/openai/kiro_openai_stream.go @@ -0,0 +1,207 @@ +// Package openai provides streaming SSE event building for OpenAI format. +// This package handles the construction of OpenAI-compatible Server-Sent Events (SSE) +// for streaming responses from Kiro API. +package openai + +import ( + "encoding/json" + "fmt" + "time" + + "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage" +) + +// OpenAIStreamState tracks the state of streaming response conversion +type OpenAIStreamState struct { + ChunkIndex int + ToolCallIndex int + HasSentFirstChunk bool + Model string + ResponseID string + Created int64 +} + +// NewOpenAIStreamState creates a new stream state for tracking +func NewOpenAIStreamState(model string) *OpenAIStreamState { + return &OpenAIStreamState{ + ChunkIndex: 0, + ToolCallIndex: 0, + HasSentFirstChunk: false, + Model: model, + ResponseID: "chatcmpl-" + uuid.New().String()[:24], + Created: time.Now().Unix(), + } +} + +// FormatSSEEvent formats a JSON payload as an SSE event +func FormatSSEEvent(data []byte) string { + return fmt.Sprintf("data: %s", string(data)) +} + +// BuildOpenAISSETextDelta creates an SSE event for text content delta +func BuildOpenAISSETextDelta(state *OpenAIStreamState, textDelta string) string { + delta := map[string]interface{}{ + "content": textDelta, + } + + // Include role in first chunk + if !state.HasSentFirstChunk { + delta["role"] = "assistant" + state.HasSentFirstChunk = true + } + + chunk := buildBaseChunk(state, delta, nil) + result, _ := json.Marshal(chunk) + state.ChunkIndex++ + return FormatSSEEvent(result) +} + +// BuildOpenAISSEToolCallStart creates an SSE event for tool call start +func BuildOpenAISSEToolCallStart(state *OpenAIStreamState, toolUseID, toolName string) string { + toolCall := map[string]interface{}{ + "index": state.ToolCallIndex, + "id": toolUseID, + "type": "function", + "function": map[string]interface{}{ + "name": toolName, + "arguments": "", + }, + } + + delta := map[string]interface{}{ + "tool_calls": []map[string]interface{}{toolCall}, + } + + // Include role in first chunk if not sent yet + if !state.HasSentFirstChunk { + delta["role"] = "assistant" + state.HasSentFirstChunk = true + } + + chunk := buildBaseChunk(state, delta, nil) + result, _ := json.Marshal(chunk) + state.ChunkIndex++ + return FormatSSEEvent(result) +} + +// BuildOpenAISSEToolCallArgumentsDelta creates an SSE event for tool call arguments delta +func BuildOpenAISSEToolCallArgumentsDelta(state *OpenAIStreamState, argumentsDelta string, toolIndex int) string { + toolCall := map[string]interface{}{ + "index": toolIndex, + "function": map[string]interface{}{ + "arguments": argumentsDelta, + }, + } + + delta := map[string]interface{}{ + "tool_calls": []map[string]interface{}{toolCall}, + } + + chunk := buildBaseChunk(state, delta, nil) + result, _ := json.Marshal(chunk) + state.ChunkIndex++ + return FormatSSEEvent(result) +} + +// BuildOpenAISSEFinish creates an SSE event with finish_reason +func BuildOpenAISSEFinish(state *OpenAIStreamState, finishReason string) string { + chunk := buildBaseChunk(state, map[string]interface{}{}, &finishReason) + result, _ := json.Marshal(chunk) + state.ChunkIndex++ + return FormatSSEEvent(result) +} + +// BuildOpenAISSEUsage creates an SSE event with usage information +func BuildOpenAISSEUsage(state *OpenAIStreamState, usageInfo usage.Detail) string { + chunk := map[string]interface{}{ + "id": state.ResponseID, + "object": "chat.completion.chunk", + "created": state.Created, + "model": state.Model, + "choices": []map[string]interface{}{}, + "usage": map[string]interface{}{ + "prompt_tokens": usageInfo.InputTokens, + "completion_tokens": usageInfo.OutputTokens, + "total_tokens": usageInfo.InputTokens + usageInfo.OutputTokens, + }, + } + result, _ := json.Marshal(chunk) + return FormatSSEEvent(result) +} + +// BuildOpenAISSEDone creates the final [DONE] SSE event +func BuildOpenAISSEDone() string { + return "data: [DONE]" +} + +// buildBaseChunk creates a base chunk structure for streaming +func buildBaseChunk(state *OpenAIStreamState, delta map[string]interface{}, finishReason *string) map[string]interface{} { + choice := map[string]interface{}{ + "index": 0, + "delta": delta, + } + + if finishReason != nil { + choice["finish_reason"] = *finishReason + } else { + choice["finish_reason"] = nil + } + + return map[string]interface{}{ + "id": state.ResponseID, + "object": "chat.completion.chunk", + "created": state.Created, + "model": state.Model, + "choices": []map[string]interface{}{choice}, + } +} + +// BuildOpenAISSEReasoningDelta creates an SSE event for reasoning content delta +// This is used for o1/o3 style models that expose reasoning tokens +func BuildOpenAISSEReasoningDelta(state *OpenAIStreamState, reasoningDelta string) string { + delta := map[string]interface{}{ + "reasoning_content": reasoningDelta, + } + + // Include role in first chunk + if !state.HasSentFirstChunk { + delta["role"] = "assistant" + state.HasSentFirstChunk = true + } + + chunk := buildBaseChunk(state, delta, nil) + result, _ := json.Marshal(chunk) + state.ChunkIndex++ + return FormatSSEEvent(result) +} + +// BuildOpenAISSEFirstChunk creates the first chunk with role only +func BuildOpenAISSEFirstChunk(state *OpenAIStreamState) string { + delta := map[string]interface{}{ + "role": "assistant", + "content": "", + } + + state.HasSentFirstChunk = true + chunk := buildBaseChunk(state, delta, nil) + result, _ := json.Marshal(chunk) + state.ChunkIndex++ + return FormatSSEEvent(result) +} + +// ThinkingTagState tracks state for thinking tag detection in streaming +type ThinkingTagState struct { + InThinkingBlock bool + PendingStartChars int + PendingEndChars int +} + +// NewThinkingTagState creates a new thinking tag state +func NewThinkingTagState() *ThinkingTagState { + return &ThinkingTagState{ + InThinkingBlock: false, + PendingStartChars: 0, + PendingEndChars: 0, + } +} \ No newline at end of file