From c8cee6a20971a3144433c0bb8f02b21efec7ee32 Mon Sep 17 00:00:00 2001 From: Muran-prog Date: Sat, 14 Mar 2026 21:01:01 +0200 Subject: [PATCH 1/2] fix: skip empty assistant message in tool call translation (#2132) When assistant has tool_calls but no text content, the translator emitted an empty message into the Responses API input array before function_call items. The API then couldn't match function_call_output to its function_call by call_id, returning: No tool output found for function call ... Only emit assistant messages that have content parts. Tool-call-only messages now produce function_call items directly. Added 9 tests for tool calling translation covering single/parallel calls, multi-turn conversations, name shortening, empty content edge cases, and call_id integrity. --- .../chat-completions/codex_openai_request.go | 7 +- .../codex_openai_request_test.go | 641 ++++++++++++++++++ 2 files changed, 647 insertions(+), 1 deletion(-) create mode 100644 internal/translator/codex/openai/chat-completions/codex_openai_request_test.go diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request.go b/internal/translator/codex/openai/chat-completions/codex_openai_request.go index 1ea9ca4b..6941ec46 100644 --- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go +++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go @@ -197,7 +197,12 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b } } - out, _ = sjson.SetRaw(out, "input.-1", msg) + // Don't emit empty assistant messages when only tool_calls + // are present — Responses API needs function_call items + // directly, otherwise call_id matching fails (#2132). + if role != "assistant" || len(gjson.Get(msg, "content").Array()) > 0 { + out, _ = sjson.SetRaw(out, "input.-1", msg) + } // Handle tool calls for assistant messages as separate top-level objects if role == "assistant" { diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go b/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go new file mode 100644 index 00000000..9ce52e59 --- /dev/null +++ b/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go @@ -0,0 +1,641 @@ +package chat_completions + +import ( + "strings" + "testing" + + "github.com/tidwall/gjson" +) + +// Basic tool-call: system + user + assistant(tool_calls, no content) + tool result. +// Expects developer msg + user msg + function_call + function_call_output. +// No empty assistant message should appear between user and function_call. +func TestToolCallSimple(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the weather in Paris?"}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"city\":\"Paris\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_1", + "content": "sunny, 22C" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather for a city", + "parameters": {"type": "object", "properties": {"city": {"type": "string"}}} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + if len(items) != 4 { + t.Fatalf("expected 4 input items, got %d: %s", len(items), gjson.Get(result, "input").Raw) + } + + // system -> developer + if items[0].Get("type").String() != "message" { + t.Errorf("item 0: expected type 'message', got '%s'", items[0].Get("type").String()) + } + if items[0].Get("role").String() != "developer" { + t.Errorf("item 0: expected role 'developer', got '%s'", items[0].Get("role").String()) + } + + // user + if items[1].Get("type").String() != "message" { + t.Errorf("item 1: expected type 'message', got '%s'", items[1].Get("type").String()) + } + if items[1].Get("role").String() != "user" { + t.Errorf("item 1: expected role 'user', got '%s'", items[1].Get("role").String()) + } + + // function_call, not an empty assistant msg + if items[2].Get("type").String() != "function_call" { + t.Errorf("item 2: expected type 'function_call', got '%s'", items[2].Get("type").String()) + } + if items[2].Get("call_id").String() != "call_1" { + t.Errorf("item 2: expected call_id 'call_1', got '%s'", items[2].Get("call_id").String()) + } + if items[2].Get("name").String() != "get_weather" { + t.Errorf("item 2: expected name 'get_weather', got '%s'", items[2].Get("name").String()) + } + if items[2].Get("arguments").String() != `{"city":"Paris"}` { + t.Errorf("item 2: unexpected arguments: %s", items[2].Get("arguments").String()) + } + + // function_call_output + if items[3].Get("type").String() != "function_call_output" { + t.Errorf("item 3: expected type 'function_call_output', got '%s'", items[3].Get("type").String()) + } + if items[3].Get("call_id").String() != "call_1" { + t.Errorf("item 3: expected call_id 'call_1', got '%s'", items[3].Get("call_id").String()) + } + if items[3].Get("output").String() != "sunny, 22C" { + t.Errorf("item 3: expected output 'sunny, 22C', got '%s'", items[3].Get("output").String()) + } +} + +// Assistant has both text content and tool_calls — the message should +// be emitted (non-empty content), followed by function_call items. +func TestToolCallWithContent(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "What is the weather?"}, + { + "role": "assistant", + "content": "Let me check the weather for you.", + "tool_calls": [ + { + "id": "call_abc", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_abc", + "content": "rainy, 15C" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather", + "parameters": {"type": "object", "properties": {}} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + // user + assistant(with content) + function_call + function_call_output + if len(items) != 4 { + t.Fatalf("expected 4 input items, got %d: %s", len(items), gjson.Get(result, "input").Raw) + } + + if items[0].Get("role").String() != "user" { + t.Errorf("item 0: expected role 'user', got '%s'", items[0].Get("role").String()) + } + + // assistant with content — should be kept + if items[1].Get("type").String() != "message" { + t.Errorf("item 1: expected type 'message', got '%s'", items[1].Get("type").String()) + } + if items[1].Get("role").String() != "assistant" { + t.Errorf("item 1: expected role 'assistant', got '%s'", items[1].Get("role").String()) + } + contentParts := items[1].Get("content").Array() + if len(contentParts) == 0 { + t.Errorf("item 1: assistant message should have content parts") + } + + if items[2].Get("type").String() != "function_call" { + t.Errorf("item 2: expected type 'function_call', got '%s'", items[2].Get("type").String()) + } + if items[2].Get("call_id").String() != "call_abc" { + t.Errorf("item 2: expected call_id 'call_abc', got '%s'", items[2].Get("call_id").String()) + } + + if items[3].Get("type").String() != "function_call_output" { + t.Errorf("item 3: expected type 'function_call_output', got '%s'", items[3].Get("type").String()) + } + if items[3].Get("call_id").String() != "call_abc" { + t.Errorf("item 3: expected call_id 'call_abc', got '%s'", items[3].Get("call_id").String()) + } +} + +// Parallel tool calls: assistant invokes 3 tools at once, all call_ids +// and outputs must be translated and paired correctly. +func TestMultipleToolCalls(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Compare weather in Paris, London and Tokyo"}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_paris", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"city\":\"Paris\"}" + } + }, + { + "id": "call_london", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"city\":\"London\"}" + } + }, + { + "id": "call_tokyo", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"city\":\"Tokyo\"}" + } + } + ] + }, + {"role": "tool", "tool_call_id": "call_paris", "content": "sunny, 22C"}, + {"role": "tool", "tool_call_id": "call_london", "content": "cloudy, 14C"}, + {"role": "tool", "tool_call_id": "call_tokyo", "content": "humid, 28C"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather", + "parameters": {"type": "object", "properties": {"city": {"type": "string"}}} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + // user + 3 function_call + 3 function_call_output = 7 + if len(items) != 7 { + t.Fatalf("expected 7 input items, got %d: %s", len(items), gjson.Get(result, "input").Raw) + } + + if items[0].Get("role").String() != "user" { + t.Errorf("item 0: expected role 'user', got '%s'", items[0].Get("role").String()) + } + + expectedCallIDs := []string{"call_paris", "call_london", "call_tokyo"} + for i, expectedID := range expectedCallIDs { + idx := i + 1 + if items[idx].Get("type").String() != "function_call" { + t.Errorf("item %d: expected type 'function_call', got '%s'", idx, items[idx].Get("type").String()) + } + if items[idx].Get("call_id").String() != expectedID { + t.Errorf("item %d: expected call_id '%s', got '%s'", idx, expectedID, items[idx].Get("call_id").String()) + } + } + + expectedOutputs := []string{"sunny, 22C", "cloudy, 14C", "humid, 28C"} + for i, expectedOutput := range expectedOutputs { + idx := i + 4 + if items[idx].Get("type").String() != "function_call_output" { + t.Errorf("item %d: expected type 'function_call_output', got '%s'", idx, items[idx].Get("type").String()) + } + if items[idx].Get("call_id").String() != expectedCallIDs[i] { + t.Errorf("item %d: expected call_id '%s', got '%s'", idx, expectedCallIDs[i], items[idx].Get("call_id").String()) + } + if items[idx].Get("output").String() != expectedOutput { + t.Errorf("item %d: expected output '%s', got '%s'", idx, expectedOutput, items[idx].Get("output").String()) + } + } +} + +// Regression test for #2132: tool-call-only assistant messages (content:null) +// must not produce an empty message item in the translated output. +func TestNoSpuriousEmptyAssistantMessage(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Call a tool"}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_x", + "type": "function", + "function": {"name": "do_thing", "arguments": "{}"} + } + ] + }, + {"role": "tool", "tool_call_id": "call_x", "content": "done"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "do_thing", + "description": "Do a thing", + "parameters": {"type": "object", "properties": {}} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + + for i, item := range items { + typ := item.Get("type").String() + role := item.Get("role").String() + if typ == "message" && role == "assistant" { + contentArr := item.Get("content").Array() + if len(contentArr) == 0 { + t.Errorf("item %d: empty assistant message breaks call_id matching. item: %s", i, item.Raw) + } + } + } + + // should be exactly: user + function_call + function_call_output + if len(items) != 3 { + t.Fatalf("expected 3 input items (user + function_call + function_call_output), got %d: %s", len(items), gjson.Get(result, "input").Raw) + } + if items[0].Get("type").String() != "message" || items[0].Get("role").String() != "user" { + t.Errorf("item 0: expected user message") + } + if items[1].Get("type").String() != "function_call" { + t.Errorf("item 1: expected function_call, got %s", items[1].Get("type").String()) + } + if items[2].Get("type").String() != "function_call_output" { + t.Errorf("item 2: expected function_call_output, got %s", items[2].Get("type").String()) + } +} + +// Two rounds of tool calling in one conversation, with a text reply in between. +func TestMultiTurnToolCalling(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Weather in Paris?"}, + { + "role": "assistant", + "content": null, + "tool_calls": [{"id": "call_r1", "type": "function", "function": {"name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}}] + }, + {"role": "tool", "tool_call_id": "call_r1", "content": "sunny"}, + {"role": "assistant", "content": "It is sunny in Paris."}, + {"role": "user", "content": "And London?"}, + { + "role": "assistant", + "content": null, + "tool_calls": [{"id": "call_r2", "type": "function", "function": {"name": "get_weather", "arguments": "{\"city\":\"London\"}"}}] + }, + {"role": "tool", "tool_call_id": "call_r2", "content": "rainy"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather", + "parameters": {"type": "object", "properties": {"city": {"type": "string"}}} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + // user, func_call(r1), func_output(r1), assistant text, user, func_call(r2), func_output(r2) + if len(items) != 7 { + t.Fatalf("expected 7 input items, got %d: %s", len(items), gjson.Get(result, "input").Raw) + } + + for i, item := range items { + if item.Get("type").String() == "message" && item.Get("role").String() == "assistant" { + if len(item.Get("content").Array()) == 0 { + t.Errorf("item %d: unexpected empty assistant message", i) + } + } + } + + // round 1 + if items[1].Get("type").String() != "function_call" { + t.Errorf("item 1: expected function_call, got %s", items[1].Get("type").String()) + } + if items[1].Get("call_id").String() != "call_r1" { + t.Errorf("item 1: expected call_id 'call_r1', got '%s'", items[1].Get("call_id").String()) + } + if items[2].Get("type").String() != "function_call_output" { + t.Errorf("item 2: expected function_call_output, got %s", items[2].Get("type").String()) + } + + // text reply between rounds + if items[3].Get("type").String() != "message" || items[3].Get("role").String() != "assistant" { + t.Errorf("item 3: expected assistant message, got type=%s role=%s", items[3].Get("type").String(), items[3].Get("role").String()) + } + + // round 2 + if items[5].Get("type").String() != "function_call" { + t.Errorf("item 5: expected function_call, got %s", items[5].Get("type").String()) + } + if items[5].Get("call_id").String() != "call_r2" { + t.Errorf("item 5: expected call_id 'call_r2', got '%s'", items[5].Get("call_id").String()) + } + if items[6].Get("type").String() != "function_call_output" { + t.Errorf("item 6: expected function_call_output, got %s", items[6].Get("type").String()) + } +} + +// Tool names over 64 chars get shortened, call_id stays the same. +func TestToolNameShortening(t *testing.T) { + longName := "a_very_long_tool_name_that_exceeds_sixty_four_characters_limit_here_test" + if len(longName) <= 64 { + t.Fatalf("test setup error: name must be > 64 chars, got %d", len(longName)) + } + + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Do it"}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_long", + "type": "function", + "function": { + "name": "` + longName + `", + "arguments": "{}" + } + } + ] + }, + {"role": "tool", "tool_call_id": "call_long", "content": "ok"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "` + longName + `", + "description": "A tool with a very long name", + "parameters": {"type": "object", "properties": {}} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + + // find function_call + var funcCallItem gjson.Result + for _, item := range items { + if item.Get("type").String() == "function_call" { + funcCallItem = item + break + } + } + + if !funcCallItem.Exists() { + t.Fatal("no function_call item found in output") + } + + // call_id unchanged + if funcCallItem.Get("call_id").String() != "call_long" { + t.Errorf("call_id changed: expected 'call_long', got '%s'", funcCallItem.Get("call_id").String()) + } + + // name must be truncated + translatedName := funcCallItem.Get("name").String() + if translatedName == longName { + t.Errorf("tool name was NOT shortened: still '%s'", translatedName) + } + if len(translatedName) > 64 { + t.Errorf("shortened name still > 64 chars: len=%d name='%s'", len(translatedName), translatedName) + } +} + +// content:"" (empty string, not null) should be treated the same as null. +func TestEmptyStringContent(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Do something"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_empty", + "type": "function", + "function": {"name": "action", "arguments": "{}"} + } + ] + }, + {"role": "tool", "tool_call_id": "call_empty", "content": "result"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "action", + "description": "An action", + "parameters": {"type": "object", "properties": {}} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + + for i, item := range items { + if item.Get("type").String() == "message" && item.Get("role").String() == "assistant" { + if len(item.Get("content").Array()) == 0 { + t.Errorf("item %d: empty assistant message from content:\"\"", i) + } + } + } + + // user + function_call + function_call_output + if len(items) != 3 { + t.Errorf("expected 3 input items, got %d", len(items)) + } +} + +// Every function_call_output must have a matching function_call by call_id. +func TestCallIDsMatchBetweenCallAndOutput(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Multi-tool"}, + { + "role": "assistant", + "content": null, + "tool_calls": [ + {"id": "id_a", "type": "function", "function": {"name": "tool_a", "arguments": "{}"}}, + {"id": "id_b", "type": "function", "function": {"name": "tool_b", "arguments": "{}"}} + ] + }, + {"role": "tool", "tool_call_id": "id_a", "content": "res_a"}, + {"role": "tool", "tool_call_id": "id_b", "content": "res_b"} + ], + "tools": [ + {"type": "function", "function": {"name": "tool_a", "description": "A", "parameters": {"type": "object", "properties": {}}}}, + {"type": "function", "function": {"name": "tool_b", "description": "B", "parameters": {"type": "object", "properties": {}}}} + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + items := gjson.Get(result, "input").Array() + + // collect call_ids from function_call items + callIDs := make(map[string]bool) + for _, item := range items { + if item.Get("type").String() == "function_call" { + callIDs[item.Get("call_id").String()] = true + } + } + + for i, item := range items { + if item.Get("type").String() == "function_call_output" { + outID := item.Get("call_id").String() + if !callIDs[outID] { + t.Errorf("item %d: function_call_output has call_id '%s' with no matching function_call", i, outID) + } + } + } + + // 2 calls, 2 outputs + funcCallCount := 0 + funcOutputCount := 0 + for _, item := range items { + switch item.Get("type").String() { + case "function_call": + funcCallCount++ + case "function_call_output": + funcOutputCount++ + } + } + if funcCallCount != 2 { + t.Errorf("expected 2 function_calls, got %d", funcCallCount) + } + if funcOutputCount != 2 { + t.Errorf("expected 2 function_call_outputs, got %d", funcOutputCount) + } +} + +// Tools array should carry over to the Responses format output. +func TestToolsDefinitionTranslated(t *testing.T) { + input := []byte(`{ + "model": "gpt-4o", + "messages": [ + {"role": "user", "content": "Hi"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "search", + "description": "Search the web", + "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]} + } + } + ] + }`) + + out := ConvertOpenAIRequestToCodex("gpt-4o", input, true) + result := string(out) + + tools := gjson.Get(result, "tools").Array() + if len(tools) == 0 { + t.Fatal("no tools found in output") + } + + // look for "search" tool + found := false + for _, tool := range tools { + name := tool.Get("name").String() + if name == "" { + name = tool.Get("function.name").String() + } + if strings.Contains(name, "search") { + found = true + break + } + } + if !found { + t.Errorf("tool 'search' not found in output tools: %s", gjson.Get(result, "tools").Raw) + } +} From 0b94d36c4a8fc25f3536ed2f98aa5b9adeefa37d Mon Sep 17 00:00:00 2001 From: Muran-prog Date: Sat, 14 Mar 2026 21:45:28 +0200 Subject: [PATCH 2/2] test: use exact match for tool name assertion Address review feedback - drop function.name fallback and strings.Contains in favor of direct == comparison. --- .../openai/chat-completions/codex_openai_request_test.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go b/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go index 9ce52e59..84c8dad2 100644 --- a/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go +++ b/internal/translator/codex/openai/chat-completions/codex_openai_request_test.go @@ -1,7 +1,6 @@ package chat_completions import ( - "strings" "testing" "github.com/tidwall/gjson" @@ -623,14 +622,9 @@ func TestToolsDefinitionTranslated(t *testing.T) { t.Fatal("no tools found in output") } - // look for "search" tool found := false for _, tool := range tools { - name := tool.Get("name").String() - if name == "" { - name = tool.Get("function.name").String() - } - if strings.Contains(name, "search") { + if tool.Get("name").String() == "search" { found = true break }