diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 726b5202..9fb591fa 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -104,6 +104,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au return resp, err } + reporter.publish(ctx, parseAntigravityUsage(bodyBytes)) var param any converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, ¶m) resp = cliproxyexecutor.Response{Payload: []byte(converted)} @@ -172,7 +173,16 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya // Only retain usage statistics in the terminal chunk line = FilterSSEUsageMetadata(line) - chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(line), ¶m) + payload := jsonPayload(line) + if payload == nil { + continue + } + + if detail, ok := parseAntigravityStreamUsage(payload); ok { + reporter.publish(ctx, detail) + } + + chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), ¶m) for i := range chunks { out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])} } diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index a95ac03b..520d6474 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -256,10 +256,15 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A for scanner.Scan() { line := scanner.Bytes() appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseGeminiStreamUsage(line); ok { + filtered := FilterSSEUsageMetadata(line) + payload := jsonPayload(filtered) + if len(payload) == 0 { + continue + } + if detail, ok := parseGeminiStreamUsage(payload); ok { reporter.publish(ctx, detail) } - lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), ¶m) + lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(payload), ¶m) for i := range lines { out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} } diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index 501d05c5..266a300e 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -365,6 +365,204 @@ func parseGeminiCLIStreamUsage(line []byte) (usage.Detail, bool) { return detail, true } +func parseAntigravityUsage(data []byte) usage.Detail { + usageNode := gjson.ParseBytes(data) + node := usageNode.Get("response.usageMetadata") + if !node.Exists() { + node = usageNode.Get("usageMetadata") + } + if !node.Exists() { + node = usageNode.Get("usage_metadata") + } + if !node.Exists() { + return usage.Detail{} + } + detail := usage.Detail{ + InputTokens: node.Get("promptTokenCount").Int(), + OutputTokens: node.Get("candidatesTokenCount").Int(), + ReasoningTokens: node.Get("thoughtsTokenCount").Int(), + TotalTokens: node.Get("totalTokenCount").Int(), + } + if detail.TotalTokens == 0 { + detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens + } + return detail +} + +func parseAntigravityStreamUsage(line []byte) (usage.Detail, bool) { + payload := jsonPayload(line) + if len(payload) == 0 || !gjson.ValidBytes(payload) { + return usage.Detail{}, false + } + node := gjson.GetBytes(payload, "response.usageMetadata") + if !node.Exists() { + node = gjson.GetBytes(payload, "usageMetadata") + } + if !node.Exists() { + node = gjson.GetBytes(payload, "usage_metadata") + } + if !node.Exists() { + return usage.Detail{}, false + } + detail := usage.Detail{ + InputTokens: node.Get("promptTokenCount").Int(), + OutputTokens: node.Get("candidatesTokenCount").Int(), + ReasoningTokens: node.Get("thoughtsTokenCount").Int(), + TotalTokens: node.Get("totalTokenCount").Int(), + } + if detail.TotalTokens == 0 { + detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens + } + return detail, true +} + +var stopChunkWithoutUsage sync.Map + +func rememberStopWithoutUsage(traceID string) { + stopChunkWithoutUsage.Store(traceID, struct{}{}) + time.AfterFunc(10*time.Minute, func() { stopChunkWithoutUsage.Delete(traceID) }) +} + +// FilterSSEUsageMetadata removes usageMetadata from SSE events that are not +// terminal (finishReason != "stop"). Stop chunks are left untouched. This +// function is shared between aistudio and antigravity executors. +func FilterSSEUsageMetadata(payload []byte) []byte { + if len(payload) == 0 { + return payload + } + + lines := bytes.Split(payload, []byte("\n")) + modified := false + foundData := false + for idx, line := range lines { + trimmed := bytes.TrimSpace(line) + if len(trimmed) == 0 || !bytes.HasPrefix(trimmed, []byte("data:")) { + continue + } + foundData = true + dataIdx := bytes.Index(line, []byte("data:")) + if dataIdx < 0 { + continue + } + rawJSON := bytes.TrimSpace(line[dataIdx+5:]) + traceID := gjson.GetBytes(rawJSON, "traceId").String() + if isStopChunkWithoutUsage(rawJSON) && traceID != "" { + rememberStopWithoutUsage(traceID) + continue + } + if traceID != "" { + if _, ok := stopChunkWithoutUsage.Load(traceID); ok && hasUsageMetadata(rawJSON) { + stopChunkWithoutUsage.Delete(traceID) + continue + } + } + + cleaned, changed := StripUsageMetadataFromJSON(rawJSON) + if !changed { + continue + } + var rebuilt []byte + rebuilt = append(rebuilt, line[:dataIdx]...) + rebuilt = append(rebuilt, []byte("data:")...) + if len(cleaned) > 0 { + rebuilt = append(rebuilt, ' ') + rebuilt = append(rebuilt, cleaned...) + } + lines[idx] = rebuilt + modified = true + } + if !modified { + if !foundData { + // Handle payloads that are raw JSON without SSE data: prefix. + trimmed := bytes.TrimSpace(payload) + cleaned, changed := StripUsageMetadataFromJSON(trimmed) + if !changed { + return payload + } + return cleaned + } + return payload + } + return bytes.Join(lines, []byte("\n")) +} + +// StripUsageMetadataFromJSON drops usageMetadata unless finishReason is present (terminal). +// It handles both formats: +// - Aistudio: candidates.0.finishReason +// - Antigravity: response.candidates.0.finishReason +func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) { + jsonBytes := bytes.TrimSpace(rawJSON) + if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) { + return rawJSON, false + } + + // Check for finishReason in both aistudio and antigravity formats + finishReason := gjson.GetBytes(jsonBytes, "candidates.0.finishReason") + if !finishReason.Exists() { + finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason") + } + terminalReason := finishReason.Exists() && strings.TrimSpace(finishReason.String()) != "" + + usageMetadata := gjson.GetBytes(jsonBytes, "usageMetadata") + if !usageMetadata.Exists() { + usageMetadata = gjson.GetBytes(jsonBytes, "response.usageMetadata") + } + + // Terminal chunk: keep as-is. + if terminalReason { + return rawJSON, false + } + + // Nothing to strip + if !usageMetadata.Exists() { + return rawJSON, false + } + + // Remove usageMetadata from both possible locations + cleaned := jsonBytes + var changed bool + + if gjson.GetBytes(cleaned, "usageMetadata").Exists() { + cleaned, _ = sjson.DeleteBytes(cleaned, "usageMetadata") + changed = true + } + + if gjson.GetBytes(cleaned, "response.usageMetadata").Exists() { + cleaned, _ = sjson.DeleteBytes(cleaned, "response.usageMetadata") + changed = true + } + + return cleaned, changed +} + +func hasUsageMetadata(jsonBytes []byte) bool { + if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) { + return false + } + if gjson.GetBytes(jsonBytes, "usageMetadata").Exists() { + return true + } + if gjson.GetBytes(jsonBytes, "response.usageMetadata").Exists() { + return true + } + return false +} + +func isStopChunkWithoutUsage(jsonBytes []byte) bool { + if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) { + return false + } + finishReason := gjson.GetBytes(jsonBytes, "candidates.0.finishReason") + if !finishReason.Exists() { + finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason") + } + trimmed := strings.TrimSpace(finishReason.String()) + if !finishReason.Exists() || trimmed == "" { + return false + } + return !hasUsageMetadata(jsonBytes) +} + func jsonPayload(line []byte) []byte { trimmed := bytes.TrimSpace(line) if len(trimmed) == 0 { @@ -384,109 +582,3 @@ func jsonPayload(line []byte) []byte { } return trimmed } - -// FilterSSEUsageMetadata removes usageMetadata from intermediate SSE events so that -// only the terminal chunk retains token statistics. -// This function is shared between aistudio and antigravity executors. -func FilterSSEUsageMetadata(payload []byte) []byte { - if len(payload) == 0 { - return payload - } - - lines := bytes.Split(payload, []byte("\n")) - modified := false - for idx, line := range lines { - trimmed := bytes.TrimSpace(line) - if len(trimmed) == 0 || !bytes.HasPrefix(trimmed, []byte("data:")) { - continue - } - dataIdx := bytes.Index(line, []byte("data:")) - if dataIdx < 0 { - continue - } - rawJSON := bytes.TrimSpace(line[dataIdx+5:]) - cleaned, changed := StripUsageMetadataFromJSON(rawJSON) - if !changed { - continue - } - var rebuilt []byte - rebuilt = append(rebuilt, line[:dataIdx]...) - rebuilt = append(rebuilt, []byte("data:")...) - if len(cleaned) > 0 { - rebuilt = append(rebuilt, ' ') - rebuilt = append(rebuilt, cleaned...) - } - lines[idx] = rebuilt - modified = true - } - if !modified { - return payload - } - return bytes.Join(lines, []byte("\n")) -} - -// StripUsageMetadataFromJSON drops usageMetadata when no finishReason is present. -// This function is shared between aistudio and antigravity executors. -// It handles both formats: -// - Aistudio: candidates.0.finishReason -// - Antigravity: response.candidates.0.finishReason -func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) { - jsonBytes := bytes.TrimSpace(rawJSON) - if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) { - return rawJSON, false - } - - // Check for finishReason in both aistudio and antigravity formats - finishReason := gjson.GetBytes(jsonBytes, "candidates.0.finishReason") - if !finishReason.Exists() { - finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason") - } - - // If finishReason exists and is not empty, keep the usageMetadata - if finishReason.Exists() && finishReason.String() != "" { - return rawJSON, false - } - - // Check for usageMetadata in both possible locations - usageMetadata := gjson.GetBytes(jsonBytes, "usageMetadata") - if !usageMetadata.Exists() { - usageMetadata = gjson.GetBytes(jsonBytes, "response.usageMetadata") - } - - if hasNonZeroUsageMetadata(usageMetadata) { - return rawJSON, false - } - - if !usageMetadata.Exists() { - return rawJSON, false - } - - // Remove usageMetadata from both possible locations - cleaned := jsonBytes - var changed bool - - // Try to remove usageMetadata from root level - if gjson.GetBytes(cleaned, "usageMetadata").Exists() { - cleaned, _ = sjson.DeleteBytes(cleaned, "usageMetadata") - changed = true - } - - // Try to remove usageMetadata from response level - if gjson.GetBytes(cleaned, "response.usageMetadata").Exists() { - cleaned, _ = sjson.DeleteBytes(cleaned, "response.usageMetadata") - changed = true - } - - return cleaned, changed -} - -// hasNonZeroUsageMetadata checks if any usage token counts are present. -func hasNonZeroUsageMetadata(node gjson.Result) bool { - if !node.Exists() { - return false - } - return node.Get("totalTokenCount").Int() > 0 || - node.Get("promptTokenCount").Int() > 0 || - node.Get("candidatesTokenCount").Int() > 0 || - node.Get("thoughtsTokenCount").Int() > 0 -} diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go index cd6f4043..c70daaf2 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go @@ -98,7 +98,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq // Process the main content part of the response. partsResult := gjson.GetBytes(rawJSON, "response.candidates.0.content.parts") hasFunctionCall := false - hasValidContent := false if partsResult.IsArray() { partResults := partsResult.Array() for i := 0; i < len(partResults); i++ { @@ -119,10 +118,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq if partTextResult.Exists() { textContent := partTextResult.String() - // Skip empty text content to avoid generating unnecessary chunks - if textContent == "" { - continue - } // Handle text content, distinguishing between regular content and reasoning/thoughts. if partResult.Get("thought").Bool() { @@ -131,7 +126,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq template, _ = sjson.Set(template, "choices.0.delta.content", textContent) } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - hasValidContent = true } else if functionCallResult.Exists() { // Handle function call content. hasFunctionCall = true @@ -191,12 +185,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls") } - // Only return a chunk if there's actual content or a finish reason - finishReason := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason") - if !hasValidContent && !finishReason.Exists() { - return []string{} - } - return []string{template} } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go index 86699c7e..73df7a7c 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go @@ -98,7 +98,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ // Process the main content part of the response. partsResult := gjson.GetBytes(rawJSON, "response.candidates.0.content.parts") hasFunctionCall := false - hasValidContent := false if partsResult.IsArray() { partResults := partsResult.Array() for i := 0; i < len(partResults); i++ { @@ -119,10 +118,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ if partTextResult.Exists() { textContent := partTextResult.String() - // Skip empty text content to avoid generating unnecessary chunks - if textContent == "" { - continue - } // Handle text content, distinguishing between regular content and reasoning/thoughts. if partResult.Get("thought").Bool() { @@ -131,7 +126,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ template, _ = sjson.Set(template, "choices.0.delta.content", textContent) } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - hasValidContent = true } else if functionCallResult.Exists() { // Handle function call content. hasFunctionCall = true @@ -191,12 +185,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls") } - // Only return a chunk if there's actual content or a finish reason - finishReason := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason") - if !hasValidContent && !finishReason.Exists() { - return []string{} - } - return []string{template} } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go index 4ce7f005..24112f0f 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go @@ -111,13 +111,23 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR if !inlineDataResult.Exists() { inlineDataResult = partResult.Get("inline_data") } + thoughtSignatureResult := partResult.Get("thoughtSignature") + if !thoughtSignatureResult.Exists() { + thoughtSignatureResult = partResult.Get("thought_signature") + } + + // Skip thoughtSignature parts (encrypted reasoning not exposed downstream). + if thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != "" { + continue + } if partTextResult.Exists() { + text := partTextResult.String() // Handle text content, distinguishing between regular content and reasoning/thoughts. if partResult.Get("thought").Bool() { - template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", partTextResult.String()) + template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", text) } else { - template, _ = sjson.Set(template, "choices.0.delta.content", partTextResult.String()) + template, _ = sjson.Set(template, "choices.0.delta.content", text) } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") } else if functionCallResult.Exists() {