Merge pull request #2531 from jamestut/openai-vertex-token-usage-fix

Fix missing `response.completed.usage` for late-usage OpenAI-compatible streams
2026-04-12 09:14:15 +00:00 · 2026-04-06 09:30:49 +08:00
parent 9eabdd09db 65e9e892a4
commit 9f5bdfaa31
3 changed files with 279 additions and 140 deletions
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -298,6 +298,14 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 			helps.RecordAPIResponseError(ctx, e.cfg, errScan)
 			reporter.PublishFailure(ctx)
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
+		} else {
+			// In case the upstream close the stream without a terminal [DONE] marker.
+			// Feed a synthetic done marker through the translator so pending
+			// response.completed events are still emitted exactly once.
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, []byte("data: [DONE]"), &param)
+			for i := range chunks {
+				out <- cliproxyexecutor.StreamChunk{Payload: chunks[i]}
+			}
 		}
 		// Ensure we record the request if no usage chunk was ever seen
 		reporter.EnsurePublished(ctx)
--- a/internal/translator/openai/openai/responses/openai_openai-responses_response.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_response.go
@@ -20,12 +20,14 @@ type oaiToResponsesStateReasoning struct {
 	OutputIndex   int
 }
 type oaiToResponsesState struct {
-	Seq            int
-	ResponseID     string
-	Created        int64
-	Started        bool
-	ReasoningID    string
-	ReasoningIndex int
+	Seq               int
+	ResponseID        string
+	Created           int64
+	Started           bool
+	CompletionPending bool
+	CompletedEmitted  bool
+	ReasoningID       string
+	ReasoningIndex    int
 	// aggregation buffers for response.output
 	// Per-output message text buffers by index
 	MsgTextBuf   map[int]*strings.Builder
@@ -60,6 +62,141 @@ func emitRespEvent(event string, payload []byte) []byte {
 	return translatorcommon.SSEEventData(event, payload)
 }

+func buildResponsesCompletedEvent(st *oaiToResponsesState, requestRawJSON []byte, nextSeq func() int) []byte {
+	completed := []byte(`{"type":"response.completed","sequence_number":0,"response":{"id":"","object":"response","created_at":0,"status":"completed","background":false,"error":null}}`)
+	completed, _ = sjson.SetBytes(completed, "sequence_number", nextSeq())
+	completed, _ = sjson.SetBytes(completed, "response.id", st.ResponseID)
+	completed, _ = sjson.SetBytes(completed, "response.created_at", st.Created)
+	// Inject original request fields into response as per docs/response.completed.json
+	if requestRawJSON != nil {
+		req := gjson.ParseBytes(requestRawJSON)
+		if v := req.Get("instructions"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.instructions", v.String())
+		}
+		if v := req.Get("max_output_tokens"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.max_output_tokens", v.Int())
+		}
+		if v := req.Get("max_tool_calls"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.max_tool_calls", v.Int())
+		}
+		if v := req.Get("model"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.model", v.String())
+		}
+		if v := req.Get("parallel_tool_calls"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.parallel_tool_calls", v.Bool())
+		}
+		if v := req.Get("previous_response_id"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.previous_response_id", v.String())
+		}
+		if v := req.Get("prompt_cache_key"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.prompt_cache_key", v.String())
+		}
+		if v := req.Get("reasoning"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.reasoning", v.Value())
+		}
+		if v := req.Get("safety_identifier"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.safety_identifier", v.String())
+		}
+		if v := req.Get("service_tier"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.service_tier", v.String())
+		}
+		if v := req.Get("store"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.store", v.Bool())
+		}
+		if v := req.Get("temperature"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.temperature", v.Float())
+		}
+		if v := req.Get("text"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.text", v.Value())
+		}
+		if v := req.Get("tool_choice"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.tool_choice", v.Value())
+		}
+		if v := req.Get("tools"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.tools", v.Value())
+		}
+		if v := req.Get("top_logprobs"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.top_logprobs", v.Int())
+		}
+		if v := req.Get("top_p"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.top_p", v.Float())
+		}
+		if v := req.Get("truncation"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.truncation", v.String())
+		}
+		if v := req.Get("user"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.user", v.Value())
+		}
+		if v := req.Get("metadata"); v.Exists() {
+			completed, _ = sjson.SetBytes(completed, "response.metadata", v.Value())
+		}
+	}
+
+	outputsWrapper := []byte(`{"arr":[]}`)
+	type completedOutputItem struct {
+		index int
+		raw   []byte
+	}
+	outputItems := make([]completedOutputItem, 0, len(st.Reasonings)+len(st.MsgItemAdded)+len(st.FuncArgsBuf))
+	if len(st.Reasonings) > 0 {
+		for _, r := range st.Reasonings {
+			item := []byte(`{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`)
+			item, _ = sjson.SetBytes(item, "id", r.ReasoningID)
+			item, _ = sjson.SetBytes(item, "summary.0.text", r.ReasoningData)
+			outputItems = append(outputItems, completedOutputItem{index: r.OutputIndex, raw: item})
+		}
+	}
+	if len(st.MsgItemAdded) > 0 {
+		for i := range st.MsgItemAdded {
+			txt := ""
+			if b := st.MsgTextBuf[i]; b != nil {
+				txt = b.String()
+			}
+			item := []byte(`{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}`)
+			item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("msg_%s_%d", st.ResponseID, i))
+			item, _ = sjson.SetBytes(item, "content.0.text", txt)
+			outputItems = append(outputItems, completedOutputItem{index: st.MsgOutputIx[i], raw: item})
+		}
+	}
+	if len(st.FuncArgsBuf) > 0 {
+		for key := range st.FuncArgsBuf {
+			args := ""
+			if b := st.FuncArgsBuf[key]; b != nil {
+				args = b.String()
+			}
+			callID := st.FuncCallIDs[key]
+			name := st.FuncNames[key]
+			item := []byte(`{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}`)
+			item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("fc_%s", callID))
+			item, _ = sjson.SetBytes(item, "arguments", args)
+			item, _ = sjson.SetBytes(item, "call_id", callID)
+			item, _ = sjson.SetBytes(item, "name", name)
+			outputItems = append(outputItems, completedOutputItem{index: st.FuncOutputIx[key], raw: item})
+		}
+	}
+	sort.Slice(outputItems, func(i, j int) bool { return outputItems[i].index < outputItems[j].index })
+	for _, item := range outputItems {
+		outputsWrapper, _ = sjson.SetRawBytes(outputsWrapper, "arr.-1", item.raw)
+	}
+	if gjson.GetBytes(outputsWrapper, "arr.#").Int() > 0 {
+		completed, _ = sjson.SetRawBytes(completed, "response.output", []byte(gjson.GetBytes(outputsWrapper, "arr").Raw))
+	}
+	if st.UsageSeen {
+		completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens", st.PromptTokens)
+		completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens_details.cached_tokens", st.CachedTokens)
+		completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens", st.CompletionTokens)
+		if st.ReasoningTokens > 0 {
+			completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens_details.reasoning_tokens", st.ReasoningTokens)
+		}
+		total := st.TotalTokens
+		if total == 0 {
+			total = st.PromptTokens + st.CompletionTokens
+		}
+		completed, _ = sjson.SetBytes(completed, "response.usage.total_tokens", total)
+	}
+	return emitRespEvent("response.completed", completed)
+}
+
 // ConvertOpenAIChatCompletionsResponseToOpenAIResponses converts OpenAI Chat Completions streaming chunks
 // to OpenAI Responses SSE events (response.*).
 func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) [][]byte {
@@ -90,6 +227,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 		return [][]byte{}
 	}
 	if bytes.Equal(rawJSON, []byte("[DONE]")) {
+		if st.CompletionPending && !st.CompletedEmitted {
+			st.CompletedEmitted = true
+			return [][]byte{buildResponsesCompletedEvent(st, requestRawJSON, func() int { st.Seq++; return st.Seq })}
+		}
 		return [][]byte{}
 	}

@@ -165,6 +306,8 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 		st.TotalTokens = 0
 		st.ReasoningTokens = 0
 		st.UsageSeen = false
+		st.CompletionPending = false
+		st.CompletedEmitted = false
 		// response.created
 		created := []byte(`{"type":"response.created","sequence_number":0,"response":{"id":"","object":"response","created_at":0,"status":"in_progress","background":false,"error":null,"output":[]}}`)
 		created, _ = sjson.SetBytes(created, "sequence_number", nextSeq())
@@ -374,8 +517,9 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 				}
 			}

-			// finish_reason triggers finalization, including text done/content done/item done,
-			// reasoning done/part.done, function args done/item done, and completed
+			// finish_reason triggers item-level finalization. response.completed is
+			// deferred until the terminal [DONE] marker so late usage-only chunks can
+			// still populate response.usage.
 			if fr := choice.Get("finish_reason"); fr.Exists() && fr.String() != "" {
 				// Emit message done events for all indices that started a message
 				if len(st.MsgItemAdded) > 0 {
@@ -464,138 +608,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
 						st.FuncArgsDone[key] = true
 					}
 				}
-				completed := []byte(`{"type":"response.completed","sequence_number":0,"response":{"id":"","object":"response","created_at":0,"status":"completed","background":false,"error":null}}`)
-				completed, _ = sjson.SetBytes(completed, "sequence_number", nextSeq())
-				completed, _ = sjson.SetBytes(completed, "response.id", st.ResponseID)
-				completed, _ = sjson.SetBytes(completed, "response.created_at", st.Created)
-				// Inject original request fields into response as per docs/response.completed.json
-				if requestRawJSON != nil {
-					req := gjson.ParseBytes(requestRawJSON)
-					if v := req.Get("instructions"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.instructions", v.String())
-					}
-					if v := req.Get("max_output_tokens"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.max_output_tokens", v.Int())
-					}
-					if v := req.Get("max_tool_calls"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.max_tool_calls", v.Int())
-					}
-					if v := req.Get("model"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.model", v.String())
-					}
-					if v := req.Get("parallel_tool_calls"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.parallel_tool_calls", v.Bool())
-					}
-					if v := req.Get("previous_response_id"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.previous_response_id", v.String())
-					}
-					if v := req.Get("prompt_cache_key"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.prompt_cache_key", v.String())
-					}
-					if v := req.Get("reasoning"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.reasoning", v.Value())
-					}
-					if v := req.Get("safety_identifier"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.safety_identifier", v.String())
-					}
-					if v := req.Get("service_tier"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.service_tier", v.String())
-					}
-					if v := req.Get("store"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.store", v.Bool())
-					}
-					if v := req.Get("temperature"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.temperature", v.Float())
-					}
-					if v := req.Get("text"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.text", v.Value())
-					}
-					if v := req.Get("tool_choice"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.tool_choice", v.Value())
-					}
-					if v := req.Get("tools"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.tools", v.Value())
-					}
-					if v := req.Get("top_logprobs"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.top_logprobs", v.Int())
-					}
-					if v := req.Get("top_p"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.top_p", v.Float())
-					}
-					if v := req.Get("truncation"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.truncation", v.String())
-					}
-					if v := req.Get("user"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.user", v.Value())
-					}
-					if v := req.Get("metadata"); v.Exists() {
-						completed, _ = sjson.SetBytes(completed, "response.metadata", v.Value())
-					}
-				}
-				// Build response.output using aggregated buffers
-				outputsWrapper := []byte(`{"arr":[]}`)
-				type completedOutputItem struct {
-					index int
-					raw   []byte
-				}
-				outputItems := make([]completedOutputItem, 0, len(st.Reasonings)+len(st.MsgItemAdded)+len(st.FuncArgsBuf))
-				if len(st.Reasonings) > 0 {
-					for _, r := range st.Reasonings {
-						item := []byte(`{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`)
-						item, _ = sjson.SetBytes(item, "id", r.ReasoningID)
-						item, _ = sjson.SetBytes(item, "summary.0.text", r.ReasoningData)
-						outputItems = append(outputItems, completedOutputItem{index: r.OutputIndex, raw: item})
-					}
-				}
-				if len(st.MsgItemAdded) > 0 {
-					for i := range st.MsgItemAdded {
-						txt := ""
-						if b := st.MsgTextBuf[i]; b != nil {
-							txt = b.String()
-						}
-						item := []byte(`{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}`)
-						item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("msg_%s_%d", st.ResponseID, i))
-						item, _ = sjson.SetBytes(item, "content.0.text", txt)
-						outputItems = append(outputItems, completedOutputItem{index: st.MsgOutputIx[i], raw: item})
-					}
-				}
-				if len(st.FuncArgsBuf) > 0 {
-					for key := range st.FuncArgsBuf {
-						args := ""
-						if b := st.FuncArgsBuf[key]; b != nil {
-							args = b.String()
-						}
-						callID := st.FuncCallIDs[key]
-						name := st.FuncNames[key]
-						item := []byte(`{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}`)
-						item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("fc_%s", callID))
-						item, _ = sjson.SetBytes(item, "arguments", args)
-						item, _ = sjson.SetBytes(item, "call_id", callID)
-						item, _ = sjson.SetBytes(item, "name", name)
-						outputItems = append(outputItems, completedOutputItem{index: st.FuncOutputIx[key], raw: item})
-					}
-				}
-				sort.Slice(outputItems, func(i, j int) bool { return outputItems[i].index < outputItems[j].index })
-				for _, item := range outputItems {
-					outputsWrapper, _ = sjson.SetRawBytes(outputsWrapper, "arr.-1", item.raw)
-				}
-				if gjson.GetBytes(outputsWrapper, "arr.#").Int() > 0 {
-					completed, _ = sjson.SetRawBytes(completed, "response.output", []byte(gjson.GetBytes(outputsWrapper, "arr").Raw))
-				}
-				if st.UsageSeen {
-					completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens", st.PromptTokens)
-					completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens_details.cached_tokens", st.CachedTokens)
-					completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens", st.CompletionTokens)
-					if st.ReasoningTokens > 0 {
-						completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens_details.reasoning_tokens", st.ReasoningTokens)
-					}
-					total := st.TotalTokens
-					if total == 0 {
-						total = st.PromptTokens + st.CompletionTokens
-					}
-					completed, _ = sjson.SetBytes(completed, "response.usage.total_tokens", total)
-				}
-				out = append(out, emitRespEvent("response.completed", completed))
+				st.CompletionPending = true
 			}

 			return true
--- a/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go
@@ -24,6 +24,120 @@ func parseOpenAIResponsesSSEEvent(t *testing.T, chunk []byte) (string, gjson.Res
 	return event, gjson.Parse(dataLine)
 }

+func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_ResponseCompletedWaitsForDone(t *testing.T) {
+	t.Parallel()
+
+	request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)
+
+	tests := []struct {
+		name           string
+		in             []string
+		doneInputIndex int // Index in tt.in where the terminal [DONE] chunk arrives and response.completed must be emitted.
+		hasUsage       bool
+		inputTokens    int64
+		outputTokens   int64
+		totalTokens    int64
+	}{
+		{
+			// A provider may send finish_reason first and only attach usage in a later chunk (e.g. Vertex AI),
+			// so response.completed must wait for [DONE] to include that usage.
+			name: "late usage after finish reason",
+			in: []string{
+				`data: {"id":"resp_late_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_late_usage","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
+				`data: {"id":"resp_late_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\"}"}}]},"finish_reason":"tool_calls"}]}`,
+				`data: {"id":"resp_late_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[],"usage":{"prompt_tokens":11,"completion_tokens":7,"total_tokens":18}}`,
+				`data: [DONE]`,
+			},
+			doneInputIndex: 3,
+			hasUsage:       true,
+			inputTokens:    11,
+			outputTokens:   7,
+			totalTokens:    18,
+		},
+		{
+			// When usage arrives on the same chunk as finish_reason, we still expect a
+			// single response.completed event and it should remain deferred until [DONE].
+			name: "usage on finish reason chunk",
+			in: []string{
+				`data: {"id":"resp_usage_same_chunk","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_usage_same_chunk","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
+				`data: {"id":"resp_usage_same_chunk","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\"}"}}]},"finish_reason":"tool_calls"}],"usage":{"prompt_tokens":13,"completion_tokens":5,"total_tokens":18}}`,
+				`data: [DONE]`,
+			},
+			doneInputIndex: 2,
+			hasUsage:       true,
+			inputTokens:    13,
+			outputTokens:   5,
+			totalTokens:    18,
+		},
+		{
+			// An OpenAI-compatible streams from a buggy server might never send usage, so response.completed should
+			// still wait for [DONE] but omit the usage object entirely.
+			name: "no usage chunk",
+			in: []string{
+				`data: {"id":"resp_no_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_no_usage","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
+				`data: {"id":"resp_no_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\"}"}}]},"finish_reason":"tool_calls"}]}`,
+				`data: [DONE]`,
+			},
+			doneInputIndex: 2,
+			hasUsage:       false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			completedCount := 0
+			completedInputIndex := -1
+			var completedData gjson.Result
+
+			// Reuse converter state across input lines to simulate one streaming response.
+			var param any
+
+			for i, line := range tt.in {
+				// One upstream chunk can emit multiple downstream SSE events.
+				for _, chunk := range ConvertOpenAIChatCompletionsResponseToOpenAIResponses(context.Background(), "model", request, request, []byte(line), &param) {
+					event, data := parseOpenAIResponsesSSEEvent(t, chunk)
+					if event != "response.completed" {
+						continue
+					}
+
+					completedCount++
+					completedInputIndex = i
+					completedData = data
+					if i < tt.doneInputIndex {
+						t.Fatalf("unexpected early response.completed on input index %d", i)
+					}
+				}
+			}
+
+			if completedCount != 1 {
+				t.Fatalf("expected exactly 1 response.completed event, got %d", completedCount)
+			}
+			if completedInputIndex != tt.doneInputIndex {
+				t.Fatalf("expected response.completed on terminal [DONE] chunk at input index %d, got %d", tt.doneInputIndex, completedInputIndex)
+			}
+
+			// Missing upstream usage should stay omitted in the final completed event.
+			if !tt.hasUsage {
+				if completedData.Get("response.usage").Exists() {
+					t.Fatalf("expected response.completed to omit usage when none was provided, got %s", completedData.Get("response.usage").Raw)
+				}
+				return
+			}
+
+			// When usage is present, the final response.completed event must preserve the usage values.
+			if got := completedData.Get("response.usage.input_tokens").Int(); got != tt.inputTokens {
+				t.Fatalf("unexpected response.usage.input_tokens: got %d want %d", got, tt.inputTokens)
+			}
+			if got := completedData.Get("response.usage.output_tokens").Int(); got != tt.outputTokens {
+				t.Fatalf("unexpected response.usage.output_tokens: got %d want %d", got, tt.outputTokens)
+			}
+			if got := completedData.Get("response.usage.total_tokens").Int(); got != tt.totalTokens {
+				t.Fatalf("unexpected response.usage.total_tokens: got %d want %d", got, tt.totalTokens)
+			}
+		})
+	}
+}
+
 func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MultipleToolCallsRemainSeparate(t *testing.T) {
 	in := []string{
 		`data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_read","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
@@ -31,6 +145,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MultipleToolCalls
 		`data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":1,"id":"call_glob","type":"function","function":{"name":"glob","arguments":""}}]},"finish_reason":null}]}`,
 		`data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":1,"function":{"arguments":"{\"path\":\"C:\\\\repo\",\"pattern\":\"*.{yml,yaml}\"}"}}]},"finish_reason":null}]}`,
 		`data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`,
+		`data: [DONE]`,
 	}

 	request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)
@@ -131,6 +246,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MultiChoiceToolCa
 		`data: {"id":"resp_multi_choice","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_choice0","type":"function","function":{"name":"glob","arguments":""}}]},"finish_reason":null},{"index":1,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_choice1","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
 		`data: {"id":"resp_multi_choice","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"path\":\"C:\\\\repo\",\"pattern\":\"*.go\"}"}}]},"finish_reason":null},{"index":1,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\",\"limit\":20,\"offset\":1}"}}]},"finish_reason":null}]}`,
 		`data: {"id":"resp_multi_choice","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"},{"index":1,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`,
+		`data: [DONE]`,
 	}

 	request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)
@@ -213,6 +329,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MixedMessageAndTo
 	in := []string{
 		`data: {"id":"resp_mixed","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":"hello","reasoning_content":null,"tool_calls":null},"finish_reason":null},{"index":1,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_choice1","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
 		`data: {"id":"resp_mixed","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"stop"},{"index":1,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\",\"limit\":20,\"offset\":1}"}}]},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`,
+		`data: [DONE]`,
 	}

 	request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)
@@ -261,6 +378,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_FunctionCallDoneA
 		`data: {"id":"resp_order","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":1,"id":"call_read","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
 		`data: {"id":"resp_order","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":1,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\",\"limit\":20,\"offset\":1}"}}]},"finish_reason":null}]}`,
 		`data: {"id":"resp_order","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`,
+		`data: [DONE]`,
 	}

 	request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)