Merge pull request #2531 from jamestut/openai-vertex-token-usage-fix

Fix missing `response.completed.usage` for late-usage OpenAI-compatible streams
This commit is contained in:
Luis Pater
2026-04-06 09:30:49 +08:00
committed by GitHub
3 changed files with 279 additions and 140 deletions

View File

@@ -298,6 +298,14 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
helps.RecordAPIResponseError(ctx, e.cfg, errScan)
reporter.PublishFailure(ctx)
out <- cliproxyexecutor.StreamChunk{Err: errScan}
} else {
// In case the upstream close the stream without a terminal [DONE] marker.
// Feed a synthetic done marker through the translator so pending
// response.completed events are still emitted exactly once.
chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, []byte("data: [DONE]"), &param)
for i := range chunks {
out <- cliproxyexecutor.StreamChunk{Payload: chunks[i]}
}
}
// Ensure we record the request if no usage chunk was ever seen
reporter.EnsurePublished(ctx)