fix: handle zero output_tokens for kiro non-streaming requests

This commit is contained in:
woopencri
2026-01-28 16:27:34 +08:00
parent 8f522eed43
commit f2b0ce13d9

View File

@@ -104,13 +104,13 @@ func getGlobalFingerprintManager() *kiroauth.FingerprintManager {
// retryConfig holds configuration for socket retry logic.
// Based on kiro2Api Python implementation patterns.
type retryConfig struct {
MaxRetries int // Maximum number of retry attempts
BaseDelay time.Duration // Base delay between retries (exponential backoff)
MaxDelay time.Duration // Maximum delay cap
RetryableErrors []string // List of retryable error patterns
RetryableStatus map[int]bool // HTTP status codes to retry
FirstTokenTmout time.Duration // Timeout for first token in streaming
StreamReadTmout time.Duration // Timeout between stream chunks
MaxRetries int // Maximum number of retry attempts
BaseDelay time.Duration // Base delay between retries (exponential backoff)
MaxDelay time.Duration // Maximum delay cap
RetryableErrors []string // List of retryable error patterns
RetryableStatus map[int]bool // HTTP status codes to retry
FirstTokenTmout time.Duration // Timeout for first token in streaming
StreamReadTmout time.Duration // Timeout between stream chunks
}
// defaultRetryConfig returns the default retry configuration for Kiro socket operations.
@@ -482,12 +482,12 @@ func applyDynamicFingerprint(req *http.Request, auth *cliproxyauth.Auth) {
// Get token-specific fingerprint for dynamic UA generation
tokenKey := getTokenKey(auth)
fp := getGlobalFingerprintManager().GetFingerprint(tokenKey)
// Use fingerprint-generated dynamic User-Agent
req.Header.Set("User-Agent", fp.BuildUserAgent())
req.Header.Set("X-Amz-User-Agent", fp.BuildAmzUserAgent())
req.Header.Set("x-amzn-kiro-agent-mode", kiroIDEAgentModeSpec)
log.Debugf("kiro: using dynamic fingerprint for token %s (SDK:%s, OS:%s/%s, Kiro:%s)",
tokenKey[:8]+"...", fp.SDKVersion, fp.OSType, fp.OSVersion, fp.KiroVersion)
} else {
@@ -506,10 +506,10 @@ func (e *KiroExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth
if strings.TrimSpace(accessToken) == "" {
return statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
}
// Apply dynamic fingerprint-based headers
applyDynamicFingerprint(req, auth)
req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
req.Header.Set("Authorization", "Bearer "+accessToken)
@@ -670,7 +670,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
// Apply dynamic fingerprint-based headers
applyDynamicFingerprint(httpReq, auth)
httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
@@ -910,30 +910,36 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
}
// Fallback for usage if missing from upstream
if usageInfo.TotalTokens == 0 {
// 1. Estimate InputTokens if missing
if usageInfo.InputTokens == 0 {
if enc, encErr := getTokenizer(req.Model); encErr == nil {
if inp, countErr := countOpenAIChatTokens(enc, opts.OriginalRequest); countErr == nil {
usageInfo.InputTokens = inp
}
}
if len(content) > 0 {
// Use tiktoken for more accurate output token calculation
if enc, encErr := getTokenizer(req.Model); encErr == nil {
if tokenCount, countErr := enc.Count(content); countErr == nil {
usageInfo.OutputTokens = int64(tokenCount)
}
}
// Fallback to character count estimation if tiktoken fails
if usageInfo.OutputTokens == 0 {
usageInfo.OutputTokens = int64(len(content) / 4)
if usageInfo.OutputTokens == 0 {
usageInfo.OutputTokens = 1
}
}
// 2. Estimate OutputTokens if missing and content is available
if usageInfo.OutputTokens == 0 && len(content) > 0 {
// Use tiktoken for more accurate output token calculation
if enc, encErr := getTokenizer(req.Model); encErr == nil {
if tokenCount, countErr := enc.Count(content); countErr == nil {
usageInfo.OutputTokens = int64(tokenCount)
}
}
// Fallback to character count estimation if tiktoken fails
if usageInfo.OutputTokens == 0 {
usageInfo.OutputTokens = int64(len(content) / 4)
if usageInfo.OutputTokens == 0 {
usageInfo.OutputTokens = 1
}
}
usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
}
// 3. Update TotalTokens
usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
appendAPIResponseChunk(ctx, e.cfg, []byte(content))
reporter.publish(ctx, usageInfo)
@@ -1079,7 +1085,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
// Apply dynamic fingerprint-based headers
applyDynamicFingerprint(httpReq, auth)
httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())