diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index a03e4987..7f202055 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -298,6 +298,14 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy helps.RecordAPIResponseError(ctx, e.cfg, errScan) reporter.PublishFailure(ctx) out <- cliproxyexecutor.StreamChunk{Err: errScan} + } else { + // In case the upstream close the stream without a terminal [DONE] marker. + // Feed a synthetic done marker through the translator so pending + // response.completed events are still emitted exactly once. + chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, []byte("data: [DONE]"), ¶m) + for i := range chunks { + out <- cliproxyexecutor.StreamChunk{Payload: chunks[i]} + } } // Ensure we record the request if no usage chunk was ever seen reporter.EnsurePublished(ctx) diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_response.go b/internal/translator/openai/openai/responses/openai_openai-responses_response.go index a34a6ff4..8a44aede 100644 --- a/internal/translator/openai/openai/responses/openai_openai-responses_response.go +++ b/internal/translator/openai/openai/responses/openai_openai-responses_response.go @@ -20,12 +20,14 @@ type oaiToResponsesStateReasoning struct { OutputIndex int } type oaiToResponsesState struct { - Seq int - ResponseID string - Created int64 - Started bool - ReasoningID string - ReasoningIndex int + Seq int + ResponseID string + Created int64 + Started bool + CompletionPending bool + CompletedEmitted bool + ReasoningID string + ReasoningIndex int // aggregation buffers for response.output // Per-output message text buffers by index MsgTextBuf map[int]*strings.Builder @@ -60,6 +62,141 @@ func emitRespEvent(event string, payload []byte) []byte { return translatorcommon.SSEEventData(event, payload) } +func buildResponsesCompletedEvent(st *oaiToResponsesState, requestRawJSON []byte, nextSeq func() int) []byte { + completed := []byte(`{"type":"response.completed","sequence_number":0,"response":{"id":"","object":"response","created_at":0,"status":"completed","background":false,"error":null}}`) + completed, _ = sjson.SetBytes(completed, "sequence_number", nextSeq()) + completed, _ = sjson.SetBytes(completed, "response.id", st.ResponseID) + completed, _ = sjson.SetBytes(completed, "response.created_at", st.Created) + // Inject original request fields into response as per docs/response.completed.json + if requestRawJSON != nil { + req := gjson.ParseBytes(requestRawJSON) + if v := req.Get("instructions"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.instructions", v.String()) + } + if v := req.Get("max_output_tokens"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.max_output_tokens", v.Int()) + } + if v := req.Get("max_tool_calls"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.max_tool_calls", v.Int()) + } + if v := req.Get("model"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.model", v.String()) + } + if v := req.Get("parallel_tool_calls"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.parallel_tool_calls", v.Bool()) + } + if v := req.Get("previous_response_id"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.previous_response_id", v.String()) + } + if v := req.Get("prompt_cache_key"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.prompt_cache_key", v.String()) + } + if v := req.Get("reasoning"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.reasoning", v.Value()) + } + if v := req.Get("safety_identifier"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.safety_identifier", v.String()) + } + if v := req.Get("service_tier"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.service_tier", v.String()) + } + if v := req.Get("store"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.store", v.Bool()) + } + if v := req.Get("temperature"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.temperature", v.Float()) + } + if v := req.Get("text"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.text", v.Value()) + } + if v := req.Get("tool_choice"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.tool_choice", v.Value()) + } + if v := req.Get("tools"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.tools", v.Value()) + } + if v := req.Get("top_logprobs"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.top_logprobs", v.Int()) + } + if v := req.Get("top_p"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.top_p", v.Float()) + } + if v := req.Get("truncation"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.truncation", v.String()) + } + if v := req.Get("user"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.user", v.Value()) + } + if v := req.Get("metadata"); v.Exists() { + completed, _ = sjson.SetBytes(completed, "response.metadata", v.Value()) + } + } + + outputsWrapper := []byte(`{"arr":[]}`) + type completedOutputItem struct { + index int + raw []byte + } + outputItems := make([]completedOutputItem, 0, len(st.Reasonings)+len(st.MsgItemAdded)+len(st.FuncArgsBuf)) + if len(st.Reasonings) > 0 { + for _, r := range st.Reasonings { + item := []byte(`{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`) + item, _ = sjson.SetBytes(item, "id", r.ReasoningID) + item, _ = sjson.SetBytes(item, "summary.0.text", r.ReasoningData) + outputItems = append(outputItems, completedOutputItem{index: r.OutputIndex, raw: item}) + } + } + if len(st.MsgItemAdded) > 0 { + for i := range st.MsgItemAdded { + txt := "" + if b := st.MsgTextBuf[i]; b != nil { + txt = b.String() + } + item := []byte(`{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}`) + item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("msg_%s_%d", st.ResponseID, i)) + item, _ = sjson.SetBytes(item, "content.0.text", txt) + outputItems = append(outputItems, completedOutputItem{index: st.MsgOutputIx[i], raw: item}) + } + } + if len(st.FuncArgsBuf) > 0 { + for key := range st.FuncArgsBuf { + args := "" + if b := st.FuncArgsBuf[key]; b != nil { + args = b.String() + } + callID := st.FuncCallIDs[key] + name := st.FuncNames[key] + item := []byte(`{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}`) + item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("fc_%s", callID)) + item, _ = sjson.SetBytes(item, "arguments", args) + item, _ = sjson.SetBytes(item, "call_id", callID) + item, _ = sjson.SetBytes(item, "name", name) + outputItems = append(outputItems, completedOutputItem{index: st.FuncOutputIx[key], raw: item}) + } + } + sort.Slice(outputItems, func(i, j int) bool { return outputItems[i].index < outputItems[j].index }) + for _, item := range outputItems { + outputsWrapper, _ = sjson.SetRawBytes(outputsWrapper, "arr.-1", item.raw) + } + if gjson.GetBytes(outputsWrapper, "arr.#").Int() > 0 { + completed, _ = sjson.SetRawBytes(completed, "response.output", []byte(gjson.GetBytes(outputsWrapper, "arr").Raw)) + } + if st.UsageSeen { + completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens", st.PromptTokens) + completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens_details.cached_tokens", st.CachedTokens) + completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens", st.CompletionTokens) + if st.ReasoningTokens > 0 { + completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens_details.reasoning_tokens", st.ReasoningTokens) + } + total := st.TotalTokens + if total == 0 { + total = st.PromptTokens + st.CompletionTokens + } + completed, _ = sjson.SetBytes(completed, "response.usage.total_tokens", total) + } + return emitRespEvent("response.completed", completed) +} + // ConvertOpenAIChatCompletionsResponseToOpenAIResponses converts OpenAI Chat Completions streaming chunks // to OpenAI Responses SSE events (response.*). func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) [][]byte { @@ -90,6 +227,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, return [][]byte{} } if bytes.Equal(rawJSON, []byte("[DONE]")) { + if st.CompletionPending && !st.CompletedEmitted { + st.CompletedEmitted = true + return [][]byte{buildResponsesCompletedEvent(st, requestRawJSON, func() int { st.Seq++; return st.Seq })} + } return [][]byte{} } @@ -165,6 +306,8 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, st.TotalTokens = 0 st.ReasoningTokens = 0 st.UsageSeen = false + st.CompletionPending = false + st.CompletedEmitted = false // response.created created := []byte(`{"type":"response.created","sequence_number":0,"response":{"id":"","object":"response","created_at":0,"status":"in_progress","background":false,"error":null,"output":[]}}`) created, _ = sjson.SetBytes(created, "sequence_number", nextSeq()) @@ -374,8 +517,9 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, } } - // finish_reason triggers finalization, including text done/content done/item done, - // reasoning done/part.done, function args done/item done, and completed + // finish_reason triggers item-level finalization. response.completed is + // deferred until the terminal [DONE] marker so late usage-only chunks can + // still populate response.usage. if fr := choice.Get("finish_reason"); fr.Exists() && fr.String() != "" { // Emit message done events for all indices that started a message if len(st.MsgItemAdded) > 0 { @@ -464,138 +608,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, st.FuncArgsDone[key] = true } } - completed := []byte(`{"type":"response.completed","sequence_number":0,"response":{"id":"","object":"response","created_at":0,"status":"completed","background":false,"error":null}}`) - completed, _ = sjson.SetBytes(completed, "sequence_number", nextSeq()) - completed, _ = sjson.SetBytes(completed, "response.id", st.ResponseID) - completed, _ = sjson.SetBytes(completed, "response.created_at", st.Created) - // Inject original request fields into response as per docs/response.completed.json - if requestRawJSON != nil { - req := gjson.ParseBytes(requestRawJSON) - if v := req.Get("instructions"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.instructions", v.String()) - } - if v := req.Get("max_output_tokens"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.max_output_tokens", v.Int()) - } - if v := req.Get("max_tool_calls"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.max_tool_calls", v.Int()) - } - if v := req.Get("model"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.model", v.String()) - } - if v := req.Get("parallel_tool_calls"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.parallel_tool_calls", v.Bool()) - } - if v := req.Get("previous_response_id"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.previous_response_id", v.String()) - } - if v := req.Get("prompt_cache_key"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.prompt_cache_key", v.String()) - } - if v := req.Get("reasoning"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.reasoning", v.Value()) - } - if v := req.Get("safety_identifier"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.safety_identifier", v.String()) - } - if v := req.Get("service_tier"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.service_tier", v.String()) - } - if v := req.Get("store"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.store", v.Bool()) - } - if v := req.Get("temperature"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.temperature", v.Float()) - } - if v := req.Get("text"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.text", v.Value()) - } - if v := req.Get("tool_choice"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.tool_choice", v.Value()) - } - if v := req.Get("tools"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.tools", v.Value()) - } - if v := req.Get("top_logprobs"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.top_logprobs", v.Int()) - } - if v := req.Get("top_p"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.top_p", v.Float()) - } - if v := req.Get("truncation"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.truncation", v.String()) - } - if v := req.Get("user"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.user", v.Value()) - } - if v := req.Get("metadata"); v.Exists() { - completed, _ = sjson.SetBytes(completed, "response.metadata", v.Value()) - } - } - // Build response.output using aggregated buffers - outputsWrapper := []byte(`{"arr":[]}`) - type completedOutputItem struct { - index int - raw []byte - } - outputItems := make([]completedOutputItem, 0, len(st.Reasonings)+len(st.MsgItemAdded)+len(st.FuncArgsBuf)) - if len(st.Reasonings) > 0 { - for _, r := range st.Reasonings { - item := []byte(`{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`) - item, _ = sjson.SetBytes(item, "id", r.ReasoningID) - item, _ = sjson.SetBytes(item, "summary.0.text", r.ReasoningData) - outputItems = append(outputItems, completedOutputItem{index: r.OutputIndex, raw: item}) - } - } - if len(st.MsgItemAdded) > 0 { - for i := range st.MsgItemAdded { - txt := "" - if b := st.MsgTextBuf[i]; b != nil { - txt = b.String() - } - item := []byte(`{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}`) - item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("msg_%s_%d", st.ResponseID, i)) - item, _ = sjson.SetBytes(item, "content.0.text", txt) - outputItems = append(outputItems, completedOutputItem{index: st.MsgOutputIx[i], raw: item}) - } - } - if len(st.FuncArgsBuf) > 0 { - for key := range st.FuncArgsBuf { - args := "" - if b := st.FuncArgsBuf[key]; b != nil { - args = b.String() - } - callID := st.FuncCallIDs[key] - name := st.FuncNames[key] - item := []byte(`{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}`) - item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("fc_%s", callID)) - item, _ = sjson.SetBytes(item, "arguments", args) - item, _ = sjson.SetBytes(item, "call_id", callID) - item, _ = sjson.SetBytes(item, "name", name) - outputItems = append(outputItems, completedOutputItem{index: st.FuncOutputIx[key], raw: item}) - } - } - sort.Slice(outputItems, func(i, j int) bool { return outputItems[i].index < outputItems[j].index }) - for _, item := range outputItems { - outputsWrapper, _ = sjson.SetRawBytes(outputsWrapper, "arr.-1", item.raw) - } - if gjson.GetBytes(outputsWrapper, "arr.#").Int() > 0 { - completed, _ = sjson.SetRawBytes(completed, "response.output", []byte(gjson.GetBytes(outputsWrapper, "arr").Raw)) - } - if st.UsageSeen { - completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens", st.PromptTokens) - completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens_details.cached_tokens", st.CachedTokens) - completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens", st.CompletionTokens) - if st.ReasoningTokens > 0 { - completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens_details.reasoning_tokens", st.ReasoningTokens) - } - total := st.TotalTokens - if total == 0 { - total = st.PromptTokens + st.CompletionTokens - } - completed, _ = sjson.SetBytes(completed, "response.usage.total_tokens", total) - } - out = append(out, emitRespEvent("response.completed", completed)) + st.CompletionPending = true } return true diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go b/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go index 9f3ed3f4..cafcacb7 100644 --- a/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go +++ b/internal/translator/openai/openai/responses/openai_openai-responses_response_test.go @@ -24,6 +24,120 @@ func parseOpenAIResponsesSSEEvent(t *testing.T, chunk []byte) (string, gjson.Res return event, gjson.Parse(dataLine) } +func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_ResponseCompletedWaitsForDone(t *testing.T) { + t.Parallel() + + request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`) + + tests := []struct { + name string + in []string + doneInputIndex int // Index in tt.in where the terminal [DONE] chunk arrives and response.completed must be emitted. + hasUsage bool + inputTokens int64 + outputTokens int64 + totalTokens int64 + }{ + { + // A provider may send finish_reason first and only attach usage in a later chunk (e.g. Vertex AI), + // so response.completed must wait for [DONE] to include that usage. + name: "late usage after finish reason", + in: []string{ + `data: {"id":"resp_late_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_late_usage","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`, + `data: {"id":"resp_late_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\"}"}}]},"finish_reason":"tool_calls"}]}`, + `data: {"id":"resp_late_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[],"usage":{"prompt_tokens":11,"completion_tokens":7,"total_tokens":18}}`, + `data: [DONE]`, + }, + doneInputIndex: 3, + hasUsage: true, + inputTokens: 11, + outputTokens: 7, + totalTokens: 18, + }, + { + // When usage arrives on the same chunk as finish_reason, we still expect a + // single response.completed event and it should remain deferred until [DONE]. + name: "usage on finish reason chunk", + in: []string{ + `data: {"id":"resp_usage_same_chunk","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_usage_same_chunk","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`, + `data: {"id":"resp_usage_same_chunk","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\"}"}}]},"finish_reason":"tool_calls"}],"usage":{"prompt_tokens":13,"completion_tokens":5,"total_tokens":18}}`, + `data: [DONE]`, + }, + doneInputIndex: 2, + hasUsage: true, + inputTokens: 13, + outputTokens: 5, + totalTokens: 18, + }, + { + // An OpenAI-compatible streams from a buggy server might never send usage, so response.completed should + // still wait for [DONE] but omit the usage object entirely. + name: "no usage chunk", + in: []string{ + `data: {"id":"resp_no_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_no_usage","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`, + `data: {"id":"resp_no_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\"}"}}]},"finish_reason":"tool_calls"}]}`, + `data: [DONE]`, + }, + doneInputIndex: 2, + hasUsage: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + completedCount := 0 + completedInputIndex := -1 + var completedData gjson.Result + + // Reuse converter state across input lines to simulate one streaming response. + var param any + + for i, line := range tt.in { + // One upstream chunk can emit multiple downstream SSE events. + for _, chunk := range ConvertOpenAIChatCompletionsResponseToOpenAIResponses(context.Background(), "model", request, request, []byte(line), ¶m) { + event, data := parseOpenAIResponsesSSEEvent(t, chunk) + if event != "response.completed" { + continue + } + + completedCount++ + completedInputIndex = i + completedData = data + if i < tt.doneInputIndex { + t.Fatalf("unexpected early response.completed on input index %d", i) + } + } + } + + if completedCount != 1 { + t.Fatalf("expected exactly 1 response.completed event, got %d", completedCount) + } + if completedInputIndex != tt.doneInputIndex { + t.Fatalf("expected response.completed on terminal [DONE] chunk at input index %d, got %d", tt.doneInputIndex, completedInputIndex) + } + + // Missing upstream usage should stay omitted in the final completed event. + if !tt.hasUsage { + if completedData.Get("response.usage").Exists() { + t.Fatalf("expected response.completed to omit usage when none was provided, got %s", completedData.Get("response.usage").Raw) + } + return + } + + // When usage is present, the final response.completed event must preserve the usage values. + if got := completedData.Get("response.usage.input_tokens").Int(); got != tt.inputTokens { + t.Fatalf("unexpected response.usage.input_tokens: got %d want %d", got, tt.inputTokens) + } + if got := completedData.Get("response.usage.output_tokens").Int(); got != tt.outputTokens { + t.Fatalf("unexpected response.usage.output_tokens: got %d want %d", got, tt.outputTokens) + } + if got := completedData.Get("response.usage.total_tokens").Int(); got != tt.totalTokens { + t.Fatalf("unexpected response.usage.total_tokens: got %d want %d", got, tt.totalTokens) + } + }) + } +} + func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MultipleToolCallsRemainSeparate(t *testing.T) { in := []string{ `data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_read","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`, @@ -31,6 +145,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MultipleToolCalls `data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":1,"id":"call_glob","type":"function","function":{"name":"glob","arguments":""}}]},"finish_reason":null}]}`, `data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":1,"function":{"arguments":"{\"path\":\"C:\\\\repo\",\"pattern\":\"*.{yml,yaml}\"}"}}]},"finish_reason":null}]}`, `data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`, + `data: [DONE]`, } request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`) @@ -131,6 +246,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MultiChoiceToolCa `data: {"id":"resp_multi_choice","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_choice0","type":"function","function":{"name":"glob","arguments":""}}]},"finish_reason":null},{"index":1,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_choice1","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`, `data: {"id":"resp_multi_choice","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"path\":\"C:\\\\repo\",\"pattern\":\"*.go\"}"}}]},"finish_reason":null},{"index":1,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\",\"limit\":20,\"offset\":1}"}}]},"finish_reason":null}]}`, `data: {"id":"resp_multi_choice","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"},{"index":1,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`, + `data: [DONE]`, } request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`) @@ -213,6 +329,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MixedMessageAndTo in := []string{ `data: {"id":"resp_mixed","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":"hello","reasoning_content":null,"tool_calls":null},"finish_reason":null},{"index":1,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_choice1","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`, `data: {"id":"resp_mixed","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"stop"},{"index":1,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\",\"limit\":20,\"offset\":1}"}}]},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`, + `data: [DONE]`, } request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`) @@ -261,6 +378,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_FunctionCallDoneA `data: {"id":"resp_order","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":1,"id":"call_read","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`, `data: {"id":"resp_order","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":1,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\",\"limit\":20,\"offset\":1}"}}]},"finish_reason":null}]}`, `data: {"id":"resp_order","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`, + `data: [DONE]`, } request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)