From f2b0ce13d93b5994a54e9ea221fdfa0840998784 Mon Sep 17 00:00:00 2001 From: woopencri Date: Wed, 28 Jan 2026 16:27:34 +0800 Subject: [PATCH] fix: handle zero output_tokens for kiro non-streaming requests --- internal/runtime/executor/kiro_executor.go | 62 ++++++++++++---------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/internal/runtime/executor/kiro_executor.go b/internal/runtime/executor/kiro_executor.go index 57574268..ac286e25 100644 --- a/internal/runtime/executor/kiro_executor.go +++ b/internal/runtime/executor/kiro_executor.go @@ -104,13 +104,13 @@ func getGlobalFingerprintManager() *kiroauth.FingerprintManager { // retryConfig holds configuration for socket retry logic. // Based on kiro2Api Python implementation patterns. type retryConfig struct { - MaxRetries int // Maximum number of retry attempts - BaseDelay time.Duration // Base delay between retries (exponential backoff) - MaxDelay time.Duration // Maximum delay cap - RetryableErrors []string // List of retryable error patterns - RetryableStatus map[int]bool // HTTP status codes to retry - FirstTokenTmout time.Duration // Timeout for first token in streaming - StreamReadTmout time.Duration // Timeout between stream chunks + MaxRetries int // Maximum number of retry attempts + BaseDelay time.Duration // Base delay between retries (exponential backoff) + MaxDelay time.Duration // Maximum delay cap + RetryableErrors []string // List of retryable error patterns + RetryableStatus map[int]bool // HTTP status codes to retry + FirstTokenTmout time.Duration // Timeout for first token in streaming + StreamReadTmout time.Duration // Timeout between stream chunks } // defaultRetryConfig returns the default retry configuration for Kiro socket operations. @@ -482,12 +482,12 @@ func applyDynamicFingerprint(req *http.Request, auth *cliproxyauth.Auth) { // Get token-specific fingerprint for dynamic UA generation tokenKey := getTokenKey(auth) fp := getGlobalFingerprintManager().GetFingerprint(tokenKey) - + // Use fingerprint-generated dynamic User-Agent req.Header.Set("User-Agent", fp.BuildUserAgent()) req.Header.Set("X-Amz-User-Agent", fp.BuildAmzUserAgent()) req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec) - + log.Debugf("kiro: using dynamic fingerprint for token %s (SDK:%s, OS:%s/%s, Kiro:%s)", tokenKey[:8]+"...", fp.SDKVersion, fp.OSType, fp.OSVersion, fp.KiroVersion) } else { @@ -506,10 +506,10 @@ func (e *KiroExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth if strings.TrimSpace(accessToken) == "" { return statusErr{code: http.StatusUnauthorized, msg: "missing access token"} } - + // Apply dynamic fingerprint-based headers applyDynamicFingerprint(req, auth) - + req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3") req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String()) req.Header.Set("Authorization", "Bearer "+accessToken) @@ -670,7 +670,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. // Apply dynamic fingerprint-based headers applyDynamicFingerprint(httpReq, auth) - + httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3") httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String()) @@ -910,30 +910,36 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth. } // Fallback for usage if missing from upstream - if usageInfo.TotalTokens == 0 { + + // 1. Estimate InputTokens if missing + if usageInfo.InputTokens == 0 { if enc, encErr := getTokenizer(req.Model); encErr == nil { if inp, countErr := countOpenAIChatTokens(enc, opts.OriginalRequest); countErr == nil { usageInfo.InputTokens = inp } } - if len(content) > 0 { - // Use tiktoken for more accurate output token calculation - if enc, encErr := getTokenizer(req.Model); encErr == nil { - if tokenCount, countErr := enc.Count(content); countErr == nil { - usageInfo.OutputTokens = int64(tokenCount) - } - } - // Fallback to character count estimation if tiktoken fails - if usageInfo.OutputTokens == 0 { - usageInfo.OutputTokens = int64(len(content) / 4) - if usageInfo.OutputTokens == 0 { - usageInfo.OutputTokens = 1 - } + } + + // 2. Estimate OutputTokens if missing and content is available + if usageInfo.OutputTokens == 0 && len(content) > 0 { + // Use tiktoken for more accurate output token calculation + if enc, encErr := getTokenizer(req.Model); encErr == nil { + if tokenCount, countErr := enc.Count(content); countErr == nil { + usageInfo.OutputTokens = int64(tokenCount) + } + } + // Fallback to character count estimation if tiktoken fails + if usageInfo.OutputTokens == 0 { + usageInfo.OutputTokens = int64(len(content) / 4) + if usageInfo.OutputTokens == 0 { + usageInfo.OutputTokens = 1 } } - usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens } + // 3. Update TotalTokens + usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens + appendAPIResponseChunk(ctx, e.cfg, []byte(content)) reporter.publish(ctx, usageInfo) @@ -1079,7 +1085,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox // Apply dynamic fingerprint-based headers applyDynamicFingerprint(httpReq, auth) - + httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3") httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())