From 8ce22b84039d1a760a6bc87e27733304447b455d Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 12:50:23 +0800 Subject: [PATCH 01/10] fix(sse): preserve usage metadata for stop chunks --- internal/runtime/executor/usage_helpers.go | 44 ++++++++++------------ 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index 501d05c5..b9913247 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -385,9 +385,9 @@ func jsonPayload(line []byte) []byte { return trimmed } -// FilterSSEUsageMetadata removes usageMetadata from intermediate SSE events so that -// only the terminal chunk retains token statistics. -// This function is shared between aistudio and antigravity executors. +// FilterSSEUsageMetadata removes usageMetadata from SSE events that are not +// terminal (finishReason != "stop"). Stop chunks are left untouched. This +// function is shared between aistudio and antigravity executors. func FilterSSEUsageMetadata(payload []byte) []byte { if len(payload) == 0 { return payload @@ -395,11 +395,13 @@ func FilterSSEUsageMetadata(payload []byte) []byte { lines := bytes.Split(payload, []byte("\n")) modified := false + foundData := false for idx, line := range lines { trimmed := bytes.TrimSpace(line) if len(trimmed) == 0 || !bytes.HasPrefix(trimmed, []byte("data:")) { continue } + foundData = true dataIdx := bytes.Index(line, []byte("data:")) if dataIdx < 0 { continue @@ -420,13 +422,21 @@ func FilterSSEUsageMetadata(payload []byte) []byte { modified = true } if !modified { + if !foundData { + // Handle payloads that are raw JSON without SSE data: prefix. + trimmed := bytes.TrimSpace(payload) + cleaned, changed := StripUsageMetadataFromJSON(trimmed) + if !changed { + return payload + } + return cleaned + } return payload } return bytes.Join(lines, []byte("\n")) } -// StripUsageMetadataFromJSON drops usageMetadata when no finishReason is present. -// This function is shared between aistudio and antigravity executors. +// StripUsageMetadataFromJSON drops usageMetadata unless finishReason is "stop". // It handles both formats: // - Aistudio: candidates.0.finishReason // - Antigravity: response.candidates.0.finishReason @@ -441,22 +451,19 @@ func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) { if !finishReason.Exists() { finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason") } + stopReason := finishReason.Exists() && strings.ToLower(strings.TrimSpace(finishReason.String())) == "stop" - // If finishReason exists and is not empty, keep the usageMetadata - if finishReason.Exists() && finishReason.String() != "" { - return rawJSON, false - } - - // Check for usageMetadata in both possible locations usageMetadata := gjson.GetBytes(jsonBytes, "usageMetadata") if !usageMetadata.Exists() { usageMetadata = gjson.GetBytes(jsonBytes, "response.usageMetadata") } - if hasNonZeroUsageMetadata(usageMetadata) { + // Stop chunk: keep as-is. + if stopReason { return rawJSON, false } + // Nothing to strip if !usageMetadata.Exists() { return rawJSON, false } @@ -465,13 +472,11 @@ func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) { cleaned := jsonBytes var changed bool - // Try to remove usageMetadata from root level if gjson.GetBytes(cleaned, "usageMetadata").Exists() { cleaned, _ = sjson.DeleteBytes(cleaned, "usageMetadata") changed = true } - // Try to remove usageMetadata from response level if gjson.GetBytes(cleaned, "response.usageMetadata").Exists() { cleaned, _ = sjson.DeleteBytes(cleaned, "response.usageMetadata") changed = true @@ -479,14 +484,3 @@ func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) { return cleaned, changed } - -// hasNonZeroUsageMetadata checks if any usage token counts are present. -func hasNonZeroUsageMetadata(node gjson.Result) bool { - if !node.Exists() { - return false - } - return node.Get("totalTokenCount").Int() > 0 || - node.Get("promptTokenCount").Int() > 0 || - node.Get("candidatesTokenCount").Int() > 0 || - node.Get("thoughtsTokenCount").Int() > 0 -} From b05cfd9f846d871a235a0009e77b3ea8a294a411 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 13:03:50 +0800 Subject: [PATCH 02/10] fix(translator): include empty text chunks in responses --- .../chat-completions/antigravity_openai_response.go | 12 ------------ .../chat-completions/gemini-cli_openai_response.go | 12 ------------ 2 files changed, 24 deletions(-) diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go index cd6f4043..c70daaf2 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go @@ -98,7 +98,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq // Process the main content part of the response. partsResult := gjson.GetBytes(rawJSON, "response.candidates.0.content.parts") hasFunctionCall := false - hasValidContent := false if partsResult.IsArray() { partResults := partsResult.Array() for i := 0; i < len(partResults); i++ { @@ -119,10 +118,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq if partTextResult.Exists() { textContent := partTextResult.String() - // Skip empty text content to avoid generating unnecessary chunks - if textContent == "" { - continue - } // Handle text content, distinguishing between regular content and reasoning/thoughts. if partResult.Get("thought").Bool() { @@ -131,7 +126,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq template, _ = sjson.Set(template, "choices.0.delta.content", textContent) } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - hasValidContent = true } else if functionCallResult.Exists() { // Handle function call content. hasFunctionCall = true @@ -191,12 +185,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls") } - // Only return a chunk if there's actual content or a finish reason - finishReason := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason") - if !hasValidContent && !finishReason.Exists() { - return []string{} - } - return []string{template} } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go index 86699c7e..73df7a7c 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go @@ -98,7 +98,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ // Process the main content part of the response. partsResult := gjson.GetBytes(rawJSON, "response.candidates.0.content.parts") hasFunctionCall := false - hasValidContent := false if partsResult.IsArray() { partResults := partsResult.Array() for i := 0; i < len(partResults); i++ { @@ -119,10 +118,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ if partTextResult.Exists() { textContent := partTextResult.String() - // Skip empty text content to avoid generating unnecessary chunks - if textContent == "" { - continue - } // Handle text content, distinguishing between regular content and reasoning/thoughts. if partResult.Get("thought").Bool() { @@ -131,7 +126,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ template, _ = sjson.Set(template, "choices.0.delta.content", textContent) } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - hasValidContent = true } else if functionCallResult.Exists() { // Handle function call content. hasFunctionCall = true @@ -191,12 +185,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls") } - // Only return a chunk if there's actual content or a finish reason - finishReason := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason") - if !hasValidContent && !finishReason.Exists() { - return []string{} - } - return []string{template} } From c29931e09387ae12011167c98206128189f1d812 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 13:09:16 +0800 Subject: [PATCH 03/10] fix(translator): ignore empty JSON chunks in OpenAI responses --- .../openai/chat-completions/antigravity_openai_response.go | 4 ++++ .../openai/chat-completions/gemini-cli_openai_response.go | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go index c70daaf2..99c327b5 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go @@ -45,6 +45,10 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq } } + if len(bytes.TrimSpace(rawJSON)) == 0 { + return []string{} + } + if bytes.Equal(rawJSON, []byte("[DONE]")) { return []string{} } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go index 73df7a7c..7cb49a68 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go @@ -45,6 +45,10 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ } } + if len(bytes.TrimSpace(rawJSON)) == 0 { + return []string{} + } + if bytes.Equal(rawJSON, []byte("[DONE]")) { return []string{} } From 46b4110ff37f7ae26590018ecc56073cb4020cb7 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 13:25:25 +0800 Subject: [PATCH 04/10] fix: preserve SSE usage metadata-only trailing chunks --- internal/runtime/executor/usage_helpers.go | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index b9913247..94fc1cb1 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -385,6 +385,8 @@ func jsonPayload(line []byte) []byte { return trimmed } +var stopChunkWithoutUsage sync.Map + // FilterSSEUsageMetadata removes usageMetadata from SSE events that are not // terminal (finishReason != "stop"). Stop chunks are left untouched. This // function is shared between aistudio and antigravity executors. @@ -407,6 +409,20 @@ func FilterSSEUsageMetadata(payload []byte) []byte { continue } rawJSON := bytes.TrimSpace(line[dataIdx+5:]) + traceID := gjson.GetBytes(rawJSON, "traceId").String() + if isStopChunkWithoutUsage(rawJSON) && traceID != "" { + stopChunkWithoutUsage.Store(traceID, true) + continue + } + if traceID != "" { + if v, ok := stopChunkWithoutUsage.Load(traceID); ok { + if keep, _ := v.(bool); keep && hasUsageMetadata(rawJSON) { + stopChunkWithoutUsage.Delete(traceID) + continue + } + } + } + cleaned, changed := StripUsageMetadataFromJSON(rawJSON) if !changed { continue @@ -484,3 +500,30 @@ func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) { return cleaned, changed } + +func hasUsageMetadata(jsonBytes []byte) bool { + if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) { + return false + } + if gjson.GetBytes(jsonBytes, "usageMetadata").Exists() { + return true + } + if gjson.GetBytes(jsonBytes, "response.usageMetadata").Exists() { + return true + } + return false +} + +func isStopChunkWithoutUsage(jsonBytes []byte) bool { + if len(jsonBytes) == 0 || !gjson.ValidBytes(jsonBytes) { + return false + } + finishReason := gjson.GetBytes(jsonBytes, "candidates.0.finishReason") + if !finishReason.Exists() { + finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason") + } + if !finishReason.Exists() || strings.ToLower(strings.TrimSpace(finishReason.String())) != "stop" { + return false + } + return !hasUsageMetadata(jsonBytes) +} From 1061354b2f52dfd4a0664dc4af22c64e52b726a9 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 13:49:23 +0800 Subject: [PATCH 05/10] fix: handle empty and non-JSON SSE chunks safely --- internal/runtime/executor/antigravity_executor.go | 7 ++++++- .../openai/chat-completions/antigravity_openai_response.go | 4 ---- .../openai/chat-completions/gemini-cli_openai_response.go | 4 ---- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 726b5202..477828d6 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -172,7 +172,12 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya // Only retain usage statistics in the terminal chunk line = FilterSSEUsageMetadata(line) - chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(line), ¶m) + payload := jsonPayload(line) + if payload == nil { + continue + } + + chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), ¶m) for i := range chunks { out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])} } diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go index 99c327b5..c70daaf2 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go @@ -45,10 +45,6 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq } } - if len(bytes.TrimSpace(rawJSON)) == 0 { - return []string{} - } - if bytes.Equal(rawJSON, []byte("[DONE]")) { return []string{} } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go index 7cb49a68..73df7a7c 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go @@ -45,10 +45,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ } } - if len(bytes.TrimSpace(rawJSON)) == 0 { - return []string{} - } - if bytes.Equal(rawJSON, []byte("[DONE]")) { return []string{} } From 19a048879cc7b409c5d1782c4b8e1990c7503180 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 14:04:28 +0800 Subject: [PATCH 06/10] feat(runtime): track antigravity usage and token counts --- .../runtime/executor/antigravity_executor.go | 5 ++ internal/runtime/executor/usage_helpers.go | 51 +++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 477828d6..9fb591fa 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -104,6 +104,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au return resp, err } + reporter.publish(ctx, parseAntigravityUsage(bodyBytes)) var param any converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, ¶m) resp = cliproxyexecutor.Response{Payload: []byte(converted)} @@ -177,6 +178,10 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya continue } + if detail, ok := parseAntigravityStreamUsage(payload); ok { + reporter.publish(ctx, detail) + } + chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), ¶m) for i := range chunks { out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])} diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index 94fc1cb1..d262acbe 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -365,6 +365,57 @@ func parseGeminiCLIStreamUsage(line []byte) (usage.Detail, bool) { return detail, true } +func parseAntigravityUsage(data []byte) usage.Detail { + usageNode := gjson.ParseBytes(data) + node := usageNode.Get("response.usageMetadata") + if !node.Exists() { + node = usageNode.Get("usageMetadata") + } + if !node.Exists() { + node = usageNode.Get("usage_metadata") + } + if !node.Exists() { + return usage.Detail{} + } + detail := usage.Detail{ + InputTokens: node.Get("promptTokenCount").Int(), + OutputTokens: node.Get("candidatesTokenCount").Int(), + ReasoningTokens: node.Get("thoughtsTokenCount").Int(), + TotalTokens: node.Get("totalTokenCount").Int(), + } + if detail.TotalTokens == 0 { + detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens + } + return detail +} + +func parseAntigravityStreamUsage(line []byte) (usage.Detail, bool) { + payload := jsonPayload(line) + if len(payload) == 0 || !gjson.ValidBytes(payload) { + return usage.Detail{}, false + } + node := gjson.GetBytes(payload, "response.usageMetadata") + if !node.Exists() { + node = gjson.GetBytes(payload, "usageMetadata") + } + if !node.Exists() { + node = gjson.GetBytes(payload, "usage_metadata") + } + if !node.Exists() { + return usage.Detail{}, false + } + detail := usage.Detail{ + InputTokens: node.Get("promptTokenCount").Int(), + OutputTokens: node.Get("candidatesTokenCount").Int(), + ReasoningTokens: node.Get("thoughtsTokenCount").Int(), + TotalTokens: node.Get("totalTokenCount").Int(), + } + if detail.TotalTokens == 0 { + detail.TotalTokens = detail.InputTokens + detail.OutputTokens + detail.ReasoningTokens + } + return detail, true +} + func jsonPayload(line []byte) []byte { trimmed := bytes.TrimSpace(line) if len(trimmed) == 0 { From 8356b353202e5d1c311c55470995af0a5192039b Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 15:27:47 +0800 Subject: [PATCH 07/10] fix(executor): expire stop chunks without usage metadata --- internal/runtime/executor/usage_helpers.go | 55 ++++++++++++---------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index d262acbe..de7e32ec 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -416,28 +416,13 @@ func parseAntigravityStreamUsage(line []byte) (usage.Detail, bool) { return detail, true } -func jsonPayload(line []byte) []byte { - trimmed := bytes.TrimSpace(line) - if len(trimmed) == 0 { - return nil - } - if bytes.Equal(trimmed, []byte("[DONE]")) { - return nil - } - if bytes.HasPrefix(trimmed, []byte("event:")) { - return nil - } - if bytes.HasPrefix(trimmed, []byte("data:")) { - trimmed = bytes.TrimSpace(trimmed[len("data:"):]) - } - if len(trimmed) == 0 || trimmed[0] != '{' { - return nil - } - return trimmed -} - var stopChunkWithoutUsage sync.Map +func rememberStopWithoutUsage(traceID string) { + stopChunkWithoutUsage.Store(traceID, struct{}{}) + time.AfterFunc(10*time.Minute, func() { stopChunkWithoutUsage.Delete(traceID) }) +} + // FilterSSEUsageMetadata removes usageMetadata from SSE events that are not // terminal (finishReason != "stop"). Stop chunks are left untouched. This // function is shared between aistudio and antigravity executors. @@ -462,15 +447,13 @@ func FilterSSEUsageMetadata(payload []byte) []byte { rawJSON := bytes.TrimSpace(line[dataIdx+5:]) traceID := gjson.GetBytes(rawJSON, "traceId").String() if isStopChunkWithoutUsage(rawJSON) && traceID != "" { - stopChunkWithoutUsage.Store(traceID, true) + rememberStopWithoutUsage(traceID) continue } if traceID != "" { - if v, ok := stopChunkWithoutUsage.Load(traceID); ok { - if keep, _ := v.(bool); keep && hasUsageMetadata(rawJSON) { - stopChunkWithoutUsage.Delete(traceID) - continue - } + if _, ok := stopChunkWithoutUsage.Load(traceID); ok && hasUsageMetadata(rawJSON) { + stopChunkWithoutUsage.Delete(traceID) + continue } } @@ -578,3 +561,23 @@ func isStopChunkWithoutUsage(jsonBytes []byte) bool { } return !hasUsageMetadata(jsonBytes) } + +func jsonPayload(line []byte) []byte { + trimmed := bytes.TrimSpace(line) + if len(trimmed) == 0 { + return nil + } + if bytes.Equal(trimmed, []byte("[DONE]")) { + return nil + } + if bytes.HasPrefix(trimmed, []byte("event:")) { + return nil + } + if bytes.HasPrefix(trimmed, []byte("data:")) { + trimmed = bytes.TrimSpace(trimmed[len("data:"):]) + } + if len(trimmed) == 0 || trimmed[0] != '{' { + return nil + } + return trimmed +} From d32bb9db6bb7e6560311dc3df3784016bb4be5ec Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 15:39:46 +0800 Subject: [PATCH 08/10] fix(runtime): treat non-empty finishReason as terminal --- internal/runtime/executor/usage_helpers.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go index de7e32ec..266a300e 100644 --- a/internal/runtime/executor/usage_helpers.go +++ b/internal/runtime/executor/usage_helpers.go @@ -486,7 +486,7 @@ func FilterSSEUsageMetadata(payload []byte) []byte { return bytes.Join(lines, []byte("\n")) } -// StripUsageMetadataFromJSON drops usageMetadata unless finishReason is "stop". +// StripUsageMetadataFromJSON drops usageMetadata unless finishReason is present (terminal). // It handles both formats: // - Aistudio: candidates.0.finishReason // - Antigravity: response.candidates.0.finishReason @@ -501,15 +501,15 @@ func StripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) { if !finishReason.Exists() { finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason") } - stopReason := finishReason.Exists() && strings.ToLower(strings.TrimSpace(finishReason.String())) == "stop" + terminalReason := finishReason.Exists() && strings.TrimSpace(finishReason.String()) != "" usageMetadata := gjson.GetBytes(jsonBytes, "usageMetadata") if !usageMetadata.Exists() { usageMetadata = gjson.GetBytes(jsonBytes, "response.usageMetadata") } - // Stop chunk: keep as-is. - if stopReason { + // Terminal chunk: keep as-is. + if terminalReason { return rawJSON, false } @@ -556,7 +556,8 @@ func isStopChunkWithoutUsage(jsonBytes []byte) bool { if !finishReason.Exists() { finishReason = gjson.GetBytes(jsonBytes, "response.candidates.0.finishReason") } - if !finishReason.Exists() || strings.ToLower(strings.TrimSpace(finishReason.String())) != "stop" { + trimmed := strings.TrimSpace(finishReason.String()) + if !finishReason.Exists() || trimmed == "" { return false } return !hasUsageMetadata(jsonBytes) From 88e566281ec17f40830c51f2905a2d5fde72e306 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 15:53:36 +0800 Subject: [PATCH 09/10] fix(gemini): filter SSE usage metadata in streams --- internal/runtime/executor/gemini_executor.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index a95ac03b..e4a2ce23 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -256,10 +256,11 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A for scanner.Scan() { line := scanner.Bytes() appendAPIResponseChunk(ctx, e.cfg, line) - if detail, ok := parseGeminiStreamUsage(line); ok { + filtered := FilterSSEUsageMetadata(line) + if detail, ok := parseGeminiStreamUsage(filtered); ok { reporter.publish(ctx, detail) } - lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), ¶m) + lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(filtered), ¶m) for i := range lines { out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} } From 166fa9e2e6d7103d07d9a0b7f93f410d3afa127b Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 22 Nov 2025 16:07:12 +0800 Subject: [PATCH 10/10] fix(gemini): parse stream usage from JSON, skip thoughtSignature --- internal/runtime/executor/gemini_executor.go | 8 ++++++-- .../chat-completions/gemini_openai_response.go | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index e4a2ce23..520d6474 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -257,10 +257,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A line := scanner.Bytes() appendAPIResponseChunk(ctx, e.cfg, line) filtered := FilterSSEUsageMetadata(line) - if detail, ok := parseGeminiStreamUsage(filtered); ok { + payload := jsonPayload(filtered) + if len(payload) == 0 { + continue + } + if detail, ok := parseGeminiStreamUsage(payload); ok { reporter.publish(ctx, detail) } - lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(filtered), ¶m) + lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(payload), ¶m) for i := range lines { out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go index 4ce7f005..24112f0f 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go @@ -111,13 +111,23 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR if !inlineDataResult.Exists() { inlineDataResult = partResult.Get("inline_data") } + thoughtSignatureResult := partResult.Get("thoughtSignature") + if !thoughtSignatureResult.Exists() { + thoughtSignatureResult = partResult.Get("thought_signature") + } + + // Skip thoughtSignature parts (encrypted reasoning not exposed downstream). + if thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != "" { + continue + } if partTextResult.Exists() { + text := partTextResult.String() // Handle text content, distinguishing between regular content and reasoning/thoughts. if partResult.Get("thought").Bool() { - template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", partTextResult.String()) + template, _ = sjson.Set(template, "choices.0.delta.reasoning_content", text) } else { - template, _ = sjson.Set(template, "choices.0.delta.content", partTextResult.String()) + template, _ = sjson.Set(template, "choices.0.delta.content", text) } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") } else if functionCallResult.Exists() {