mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-12 09:14:15 +00:00
Merge pull request #2531 from jamestut/openai-vertex-token-usage-fix
Fix missing `response.completed.usage` for late-usage OpenAI-compatible streams
This commit is contained in:
@@ -298,6 +298,14 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
||||
helps.RecordAPIResponseError(ctx, e.cfg, errScan)
|
||||
reporter.PublishFailure(ctx)
|
||||
out <- cliproxyexecutor.StreamChunk{Err: errScan}
|
||||
} else {
|
||||
// In case the upstream close the stream without a terminal [DONE] marker.
|
||||
// Feed a synthetic done marker through the translator so pending
|
||||
// response.completed events are still emitted exactly once.
|
||||
chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, []byte("data: [DONE]"), ¶m)
|
||||
for i := range chunks {
|
||||
out <- cliproxyexecutor.StreamChunk{Payload: chunks[i]}
|
||||
}
|
||||
}
|
||||
// Ensure we record the request if no usage chunk was ever seen
|
||||
reporter.EnsurePublished(ctx)
|
||||
|
||||
@@ -20,12 +20,14 @@ type oaiToResponsesStateReasoning struct {
|
||||
OutputIndex int
|
||||
}
|
||||
type oaiToResponsesState struct {
|
||||
Seq int
|
||||
ResponseID string
|
||||
Created int64
|
||||
Started bool
|
||||
ReasoningID string
|
||||
ReasoningIndex int
|
||||
Seq int
|
||||
ResponseID string
|
||||
Created int64
|
||||
Started bool
|
||||
CompletionPending bool
|
||||
CompletedEmitted bool
|
||||
ReasoningID string
|
||||
ReasoningIndex int
|
||||
// aggregation buffers for response.output
|
||||
// Per-output message text buffers by index
|
||||
MsgTextBuf map[int]*strings.Builder
|
||||
@@ -60,6 +62,141 @@ func emitRespEvent(event string, payload []byte) []byte {
|
||||
return translatorcommon.SSEEventData(event, payload)
|
||||
}
|
||||
|
||||
func buildResponsesCompletedEvent(st *oaiToResponsesState, requestRawJSON []byte, nextSeq func() int) []byte {
|
||||
completed := []byte(`{"type":"response.completed","sequence_number":0,"response":{"id":"","object":"response","created_at":0,"status":"completed","background":false,"error":null}}`)
|
||||
completed, _ = sjson.SetBytes(completed, "sequence_number", nextSeq())
|
||||
completed, _ = sjson.SetBytes(completed, "response.id", st.ResponseID)
|
||||
completed, _ = sjson.SetBytes(completed, "response.created_at", st.Created)
|
||||
// Inject original request fields into response as per docs/response.completed.json
|
||||
if requestRawJSON != nil {
|
||||
req := gjson.ParseBytes(requestRawJSON)
|
||||
if v := req.Get("instructions"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.instructions", v.String())
|
||||
}
|
||||
if v := req.Get("max_output_tokens"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.max_output_tokens", v.Int())
|
||||
}
|
||||
if v := req.Get("max_tool_calls"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.max_tool_calls", v.Int())
|
||||
}
|
||||
if v := req.Get("model"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.model", v.String())
|
||||
}
|
||||
if v := req.Get("parallel_tool_calls"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.parallel_tool_calls", v.Bool())
|
||||
}
|
||||
if v := req.Get("previous_response_id"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.previous_response_id", v.String())
|
||||
}
|
||||
if v := req.Get("prompt_cache_key"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.prompt_cache_key", v.String())
|
||||
}
|
||||
if v := req.Get("reasoning"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.reasoning", v.Value())
|
||||
}
|
||||
if v := req.Get("safety_identifier"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.safety_identifier", v.String())
|
||||
}
|
||||
if v := req.Get("service_tier"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.service_tier", v.String())
|
||||
}
|
||||
if v := req.Get("store"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.store", v.Bool())
|
||||
}
|
||||
if v := req.Get("temperature"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.temperature", v.Float())
|
||||
}
|
||||
if v := req.Get("text"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.text", v.Value())
|
||||
}
|
||||
if v := req.Get("tool_choice"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.tool_choice", v.Value())
|
||||
}
|
||||
if v := req.Get("tools"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.tools", v.Value())
|
||||
}
|
||||
if v := req.Get("top_logprobs"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.top_logprobs", v.Int())
|
||||
}
|
||||
if v := req.Get("top_p"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.top_p", v.Float())
|
||||
}
|
||||
if v := req.Get("truncation"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.truncation", v.String())
|
||||
}
|
||||
if v := req.Get("user"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.user", v.Value())
|
||||
}
|
||||
if v := req.Get("metadata"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.metadata", v.Value())
|
||||
}
|
||||
}
|
||||
|
||||
outputsWrapper := []byte(`{"arr":[]}`)
|
||||
type completedOutputItem struct {
|
||||
index int
|
||||
raw []byte
|
||||
}
|
||||
outputItems := make([]completedOutputItem, 0, len(st.Reasonings)+len(st.MsgItemAdded)+len(st.FuncArgsBuf))
|
||||
if len(st.Reasonings) > 0 {
|
||||
for _, r := range st.Reasonings {
|
||||
item := []byte(`{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`)
|
||||
item, _ = sjson.SetBytes(item, "id", r.ReasoningID)
|
||||
item, _ = sjson.SetBytes(item, "summary.0.text", r.ReasoningData)
|
||||
outputItems = append(outputItems, completedOutputItem{index: r.OutputIndex, raw: item})
|
||||
}
|
||||
}
|
||||
if len(st.MsgItemAdded) > 0 {
|
||||
for i := range st.MsgItemAdded {
|
||||
txt := ""
|
||||
if b := st.MsgTextBuf[i]; b != nil {
|
||||
txt = b.String()
|
||||
}
|
||||
item := []byte(`{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}`)
|
||||
item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("msg_%s_%d", st.ResponseID, i))
|
||||
item, _ = sjson.SetBytes(item, "content.0.text", txt)
|
||||
outputItems = append(outputItems, completedOutputItem{index: st.MsgOutputIx[i], raw: item})
|
||||
}
|
||||
}
|
||||
if len(st.FuncArgsBuf) > 0 {
|
||||
for key := range st.FuncArgsBuf {
|
||||
args := ""
|
||||
if b := st.FuncArgsBuf[key]; b != nil {
|
||||
args = b.String()
|
||||
}
|
||||
callID := st.FuncCallIDs[key]
|
||||
name := st.FuncNames[key]
|
||||
item := []byte(`{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}`)
|
||||
item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("fc_%s", callID))
|
||||
item, _ = sjson.SetBytes(item, "arguments", args)
|
||||
item, _ = sjson.SetBytes(item, "call_id", callID)
|
||||
item, _ = sjson.SetBytes(item, "name", name)
|
||||
outputItems = append(outputItems, completedOutputItem{index: st.FuncOutputIx[key], raw: item})
|
||||
}
|
||||
}
|
||||
sort.Slice(outputItems, func(i, j int) bool { return outputItems[i].index < outputItems[j].index })
|
||||
for _, item := range outputItems {
|
||||
outputsWrapper, _ = sjson.SetRawBytes(outputsWrapper, "arr.-1", item.raw)
|
||||
}
|
||||
if gjson.GetBytes(outputsWrapper, "arr.#").Int() > 0 {
|
||||
completed, _ = sjson.SetRawBytes(completed, "response.output", []byte(gjson.GetBytes(outputsWrapper, "arr").Raw))
|
||||
}
|
||||
if st.UsageSeen {
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens", st.PromptTokens)
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens_details.cached_tokens", st.CachedTokens)
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens", st.CompletionTokens)
|
||||
if st.ReasoningTokens > 0 {
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens_details.reasoning_tokens", st.ReasoningTokens)
|
||||
}
|
||||
total := st.TotalTokens
|
||||
if total == 0 {
|
||||
total = st.PromptTokens + st.CompletionTokens
|
||||
}
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.total_tokens", total)
|
||||
}
|
||||
return emitRespEvent("response.completed", completed)
|
||||
}
|
||||
|
||||
// ConvertOpenAIChatCompletionsResponseToOpenAIResponses converts OpenAI Chat Completions streaming chunks
|
||||
// to OpenAI Responses SSE events (response.*).
|
||||
func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) [][]byte {
|
||||
@@ -90,6 +227,10 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
return [][]byte{}
|
||||
}
|
||||
if bytes.Equal(rawJSON, []byte("[DONE]")) {
|
||||
if st.CompletionPending && !st.CompletedEmitted {
|
||||
st.CompletedEmitted = true
|
||||
return [][]byte{buildResponsesCompletedEvent(st, requestRawJSON, func() int { st.Seq++; return st.Seq })}
|
||||
}
|
||||
return [][]byte{}
|
||||
}
|
||||
|
||||
@@ -165,6 +306,8 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
st.TotalTokens = 0
|
||||
st.ReasoningTokens = 0
|
||||
st.UsageSeen = false
|
||||
st.CompletionPending = false
|
||||
st.CompletedEmitted = false
|
||||
// response.created
|
||||
created := []byte(`{"type":"response.created","sequence_number":0,"response":{"id":"","object":"response","created_at":0,"status":"in_progress","background":false,"error":null,"output":[]}}`)
|
||||
created, _ = sjson.SetBytes(created, "sequence_number", nextSeq())
|
||||
@@ -374,8 +517,9 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
}
|
||||
}
|
||||
|
||||
// finish_reason triggers finalization, including text done/content done/item done,
|
||||
// reasoning done/part.done, function args done/item done, and completed
|
||||
// finish_reason triggers item-level finalization. response.completed is
|
||||
// deferred until the terminal [DONE] marker so late usage-only chunks can
|
||||
// still populate response.usage.
|
||||
if fr := choice.Get("finish_reason"); fr.Exists() && fr.String() != "" {
|
||||
// Emit message done events for all indices that started a message
|
||||
if len(st.MsgItemAdded) > 0 {
|
||||
@@ -464,138 +608,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx context.Context,
|
||||
st.FuncArgsDone[key] = true
|
||||
}
|
||||
}
|
||||
completed := []byte(`{"type":"response.completed","sequence_number":0,"response":{"id":"","object":"response","created_at":0,"status":"completed","background":false,"error":null}}`)
|
||||
completed, _ = sjson.SetBytes(completed, "sequence_number", nextSeq())
|
||||
completed, _ = sjson.SetBytes(completed, "response.id", st.ResponseID)
|
||||
completed, _ = sjson.SetBytes(completed, "response.created_at", st.Created)
|
||||
// Inject original request fields into response as per docs/response.completed.json
|
||||
if requestRawJSON != nil {
|
||||
req := gjson.ParseBytes(requestRawJSON)
|
||||
if v := req.Get("instructions"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.instructions", v.String())
|
||||
}
|
||||
if v := req.Get("max_output_tokens"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.max_output_tokens", v.Int())
|
||||
}
|
||||
if v := req.Get("max_tool_calls"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.max_tool_calls", v.Int())
|
||||
}
|
||||
if v := req.Get("model"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.model", v.String())
|
||||
}
|
||||
if v := req.Get("parallel_tool_calls"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.parallel_tool_calls", v.Bool())
|
||||
}
|
||||
if v := req.Get("previous_response_id"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.previous_response_id", v.String())
|
||||
}
|
||||
if v := req.Get("prompt_cache_key"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.prompt_cache_key", v.String())
|
||||
}
|
||||
if v := req.Get("reasoning"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.reasoning", v.Value())
|
||||
}
|
||||
if v := req.Get("safety_identifier"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.safety_identifier", v.String())
|
||||
}
|
||||
if v := req.Get("service_tier"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.service_tier", v.String())
|
||||
}
|
||||
if v := req.Get("store"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.store", v.Bool())
|
||||
}
|
||||
if v := req.Get("temperature"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.temperature", v.Float())
|
||||
}
|
||||
if v := req.Get("text"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.text", v.Value())
|
||||
}
|
||||
if v := req.Get("tool_choice"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.tool_choice", v.Value())
|
||||
}
|
||||
if v := req.Get("tools"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.tools", v.Value())
|
||||
}
|
||||
if v := req.Get("top_logprobs"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.top_logprobs", v.Int())
|
||||
}
|
||||
if v := req.Get("top_p"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.top_p", v.Float())
|
||||
}
|
||||
if v := req.Get("truncation"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.truncation", v.String())
|
||||
}
|
||||
if v := req.Get("user"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.user", v.Value())
|
||||
}
|
||||
if v := req.Get("metadata"); v.Exists() {
|
||||
completed, _ = sjson.SetBytes(completed, "response.metadata", v.Value())
|
||||
}
|
||||
}
|
||||
// Build response.output using aggregated buffers
|
||||
outputsWrapper := []byte(`{"arr":[]}`)
|
||||
type completedOutputItem struct {
|
||||
index int
|
||||
raw []byte
|
||||
}
|
||||
outputItems := make([]completedOutputItem, 0, len(st.Reasonings)+len(st.MsgItemAdded)+len(st.FuncArgsBuf))
|
||||
if len(st.Reasonings) > 0 {
|
||||
for _, r := range st.Reasonings {
|
||||
item := []byte(`{"id":"","type":"reasoning","summary":[{"type":"summary_text","text":""}]}`)
|
||||
item, _ = sjson.SetBytes(item, "id", r.ReasoningID)
|
||||
item, _ = sjson.SetBytes(item, "summary.0.text", r.ReasoningData)
|
||||
outputItems = append(outputItems, completedOutputItem{index: r.OutputIndex, raw: item})
|
||||
}
|
||||
}
|
||||
if len(st.MsgItemAdded) > 0 {
|
||||
for i := range st.MsgItemAdded {
|
||||
txt := ""
|
||||
if b := st.MsgTextBuf[i]; b != nil {
|
||||
txt = b.String()
|
||||
}
|
||||
item := []byte(`{"id":"","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":""}],"role":"assistant"}`)
|
||||
item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("msg_%s_%d", st.ResponseID, i))
|
||||
item, _ = sjson.SetBytes(item, "content.0.text", txt)
|
||||
outputItems = append(outputItems, completedOutputItem{index: st.MsgOutputIx[i], raw: item})
|
||||
}
|
||||
}
|
||||
if len(st.FuncArgsBuf) > 0 {
|
||||
for key := range st.FuncArgsBuf {
|
||||
args := ""
|
||||
if b := st.FuncArgsBuf[key]; b != nil {
|
||||
args = b.String()
|
||||
}
|
||||
callID := st.FuncCallIDs[key]
|
||||
name := st.FuncNames[key]
|
||||
item := []byte(`{"id":"","type":"function_call","status":"completed","arguments":"","call_id":"","name":""}`)
|
||||
item, _ = sjson.SetBytes(item, "id", fmt.Sprintf("fc_%s", callID))
|
||||
item, _ = sjson.SetBytes(item, "arguments", args)
|
||||
item, _ = sjson.SetBytes(item, "call_id", callID)
|
||||
item, _ = sjson.SetBytes(item, "name", name)
|
||||
outputItems = append(outputItems, completedOutputItem{index: st.FuncOutputIx[key], raw: item})
|
||||
}
|
||||
}
|
||||
sort.Slice(outputItems, func(i, j int) bool { return outputItems[i].index < outputItems[j].index })
|
||||
for _, item := range outputItems {
|
||||
outputsWrapper, _ = sjson.SetRawBytes(outputsWrapper, "arr.-1", item.raw)
|
||||
}
|
||||
if gjson.GetBytes(outputsWrapper, "arr.#").Int() > 0 {
|
||||
completed, _ = sjson.SetRawBytes(completed, "response.output", []byte(gjson.GetBytes(outputsWrapper, "arr").Raw))
|
||||
}
|
||||
if st.UsageSeen {
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens", st.PromptTokens)
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.input_tokens_details.cached_tokens", st.CachedTokens)
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens", st.CompletionTokens)
|
||||
if st.ReasoningTokens > 0 {
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.output_tokens_details.reasoning_tokens", st.ReasoningTokens)
|
||||
}
|
||||
total := st.TotalTokens
|
||||
if total == 0 {
|
||||
total = st.PromptTokens + st.CompletionTokens
|
||||
}
|
||||
completed, _ = sjson.SetBytes(completed, "response.usage.total_tokens", total)
|
||||
}
|
||||
out = append(out, emitRespEvent("response.completed", completed))
|
||||
st.CompletionPending = true
|
||||
}
|
||||
|
||||
return true
|
||||
|
||||
@@ -24,6 +24,120 @@ func parseOpenAIResponsesSSEEvent(t *testing.T, chunk []byte) (string, gjson.Res
|
||||
return event, gjson.Parse(dataLine)
|
||||
}
|
||||
|
||||
func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_ResponseCompletedWaitsForDone(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
in []string
|
||||
doneInputIndex int // Index in tt.in where the terminal [DONE] chunk arrives and response.completed must be emitted.
|
||||
hasUsage bool
|
||||
inputTokens int64
|
||||
outputTokens int64
|
||||
totalTokens int64
|
||||
}{
|
||||
{
|
||||
// A provider may send finish_reason first and only attach usage in a later chunk (e.g. Vertex AI),
|
||||
// so response.completed must wait for [DONE] to include that usage.
|
||||
name: "late usage after finish reason",
|
||||
in: []string{
|
||||
`data: {"id":"resp_late_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_late_usage","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_late_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\"}"}}]},"finish_reason":"tool_calls"}]}`,
|
||||
`data: {"id":"resp_late_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[],"usage":{"prompt_tokens":11,"completion_tokens":7,"total_tokens":18}}`,
|
||||
`data: [DONE]`,
|
||||
},
|
||||
doneInputIndex: 3,
|
||||
hasUsage: true,
|
||||
inputTokens: 11,
|
||||
outputTokens: 7,
|
||||
totalTokens: 18,
|
||||
},
|
||||
{
|
||||
// When usage arrives on the same chunk as finish_reason, we still expect a
|
||||
// single response.completed event and it should remain deferred until [DONE].
|
||||
name: "usage on finish reason chunk",
|
||||
in: []string{
|
||||
`data: {"id":"resp_usage_same_chunk","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_usage_same_chunk","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_usage_same_chunk","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\"}"}}]},"finish_reason":"tool_calls"}],"usage":{"prompt_tokens":13,"completion_tokens":5,"total_tokens":18}}`,
|
||||
`data: [DONE]`,
|
||||
},
|
||||
doneInputIndex: 2,
|
||||
hasUsage: true,
|
||||
inputTokens: 13,
|
||||
outputTokens: 5,
|
||||
totalTokens: 18,
|
||||
},
|
||||
{
|
||||
// An OpenAI-compatible streams from a buggy server might never send usage, so response.completed should
|
||||
// still wait for [DONE] but omit the usage object entirely.
|
||||
name: "no usage chunk",
|
||||
in: []string{
|
||||
`data: {"id":"resp_no_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_no_usage","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_no_usage","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\"}"}}]},"finish_reason":"tool_calls"}]}`,
|
||||
`data: [DONE]`,
|
||||
},
|
||||
doneInputIndex: 2,
|
||||
hasUsage: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
completedCount := 0
|
||||
completedInputIndex := -1
|
||||
var completedData gjson.Result
|
||||
|
||||
// Reuse converter state across input lines to simulate one streaming response.
|
||||
var param any
|
||||
|
||||
for i, line := range tt.in {
|
||||
// One upstream chunk can emit multiple downstream SSE events.
|
||||
for _, chunk := range ConvertOpenAIChatCompletionsResponseToOpenAIResponses(context.Background(), "model", request, request, []byte(line), ¶m) {
|
||||
event, data := parseOpenAIResponsesSSEEvent(t, chunk)
|
||||
if event != "response.completed" {
|
||||
continue
|
||||
}
|
||||
|
||||
completedCount++
|
||||
completedInputIndex = i
|
||||
completedData = data
|
||||
if i < tt.doneInputIndex {
|
||||
t.Fatalf("unexpected early response.completed on input index %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if completedCount != 1 {
|
||||
t.Fatalf("expected exactly 1 response.completed event, got %d", completedCount)
|
||||
}
|
||||
if completedInputIndex != tt.doneInputIndex {
|
||||
t.Fatalf("expected response.completed on terminal [DONE] chunk at input index %d, got %d", tt.doneInputIndex, completedInputIndex)
|
||||
}
|
||||
|
||||
// Missing upstream usage should stay omitted in the final completed event.
|
||||
if !tt.hasUsage {
|
||||
if completedData.Get("response.usage").Exists() {
|
||||
t.Fatalf("expected response.completed to omit usage when none was provided, got %s", completedData.Get("response.usage").Raw)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// When usage is present, the final response.completed event must preserve the usage values.
|
||||
if got := completedData.Get("response.usage.input_tokens").Int(); got != tt.inputTokens {
|
||||
t.Fatalf("unexpected response.usage.input_tokens: got %d want %d", got, tt.inputTokens)
|
||||
}
|
||||
if got := completedData.Get("response.usage.output_tokens").Int(); got != tt.outputTokens {
|
||||
t.Fatalf("unexpected response.usage.output_tokens: got %d want %d", got, tt.outputTokens)
|
||||
}
|
||||
if got := completedData.Get("response.usage.total_tokens").Int(); got != tt.totalTokens {
|
||||
t.Fatalf("unexpected response.usage.total_tokens: got %d want %d", got, tt.totalTokens)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MultipleToolCallsRemainSeparate(t *testing.T) {
|
||||
in := []string{
|
||||
`data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_read","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
|
||||
@@ -31,6 +145,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MultipleToolCalls
|
||||
`data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":1,"id":"call_glob","type":"function","function":{"name":"glob","arguments":""}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":1,"function":{"arguments":"{\"path\":\"C:\\\\repo\",\"pattern\":\"*.{yml,yaml}\"}"}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_test","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`,
|
||||
`data: [DONE]`,
|
||||
}
|
||||
|
||||
request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)
|
||||
@@ -131,6 +246,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MultiChoiceToolCa
|
||||
`data: {"id":"resp_multi_choice","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_choice0","type":"function","function":{"name":"glob","arguments":""}}]},"finish_reason":null},{"index":1,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_choice1","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_multi_choice","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"path\":\"C:\\\\repo\",\"pattern\":\"*.go\"}"}}]},"finish_reason":null},{"index":1,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\",\"limit\":20,\"offset\":1}"}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_multi_choice","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"},{"index":1,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`,
|
||||
`data: [DONE]`,
|
||||
}
|
||||
|
||||
request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)
|
||||
@@ -213,6 +329,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_MixedMessageAndTo
|
||||
in := []string{
|
||||
`data: {"id":"resp_mixed","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":"hello","reasoning_content":null,"tool_calls":null},"finish_reason":null},{"index":1,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":0,"id":"call_choice1","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_mixed","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"stop"},{"index":1,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":0,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\",\"limit\":20,\"offset\":1}"}}]},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`,
|
||||
`data: [DONE]`,
|
||||
}
|
||||
|
||||
request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)
|
||||
@@ -261,6 +378,7 @@ func TestConvertOpenAIChatCompletionsResponseToOpenAIResponses_FunctionCallDoneA
|
||||
`data: {"id":"resp_order","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":"assistant","content":null,"reasoning_content":null,"tool_calls":[{"index":1,"id":"call_read","type":"function","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_order","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":[{"index":1,"function":{"arguments":"{\"filePath\":\"C:\\\\repo\\\\README.md\",\"limit\":20,\"offset\":1}"}}]},"finish_reason":null}]}`,
|
||||
`data: {"id":"resp_order","object":"chat.completion.chunk","created":1773896263,"model":"model","choices":[{"index":0,"delta":{"role":null,"content":null,"reasoning_content":null,"tool_calls":null},"finish_reason":"tool_calls"}],"usage":{"completion_tokens":10,"total_tokens":20,"prompt_tokens":10}}`,
|
||||
`data: [DONE]`,
|
||||
}
|
||||
|
||||
request := []byte(`{"model":"gpt-5.4","tool_choice":"auto","parallel_tool_calls":true}`)
|
||||
|
||||
Reference in New Issue
Block a user