From 671558a82260ad056728ba3b22da4614a8d04a47 Mon Sep 17 00:00:00 2001 From: TinyCoder Date: Wed, 24 Dec 2025 16:16:22 +0700 Subject: [PATCH 1/4] fix(openai): add index field to image response for LiteLLM compatibility LiteLLM's Pydantic model requires an index field in each image object. Without it, responses fail validation with "images.0.index Field required". --- .../antigravity_openai_response.go | 16 ++++++++-- .../gemini-cli_openai_response.go | 16 ++++++++-- .../gemini_openai_response.go | 31 +++++++++++++++---- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go index 7282ebc8..9dae687a 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go @@ -8,6 +8,7 @@ package chat_completions import ( "bytes" "context" + "encoding/json" "fmt" "strings" "sync/atomic" @@ -181,14 +182,23 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq mimeType = "image/png" } imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) - imagePayload := `{"image_url":{"url":""},"type":"image_url"}` - imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) imagesResult := gjson.Get(template, "choices.0.delta.images") if !imagesResult.Exists() || !imagesResult.IsArray() { template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) } + imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array()) + imagePayload, err := json.Marshal(map[string]any{ + "index": imageIndex, + "type": "image_url", + "image_url": map[string]string{ + "url": imageURL, + }, + }) + if err != nil { + continue + } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload) + template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload)) } } } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go index 2b3ac37e..f02d3658 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go @@ -8,6 +8,7 @@ package chat_completions import ( "bytes" "context" + "encoding/json" "fmt" "strings" "sync/atomic" @@ -170,14 +171,23 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ mimeType = "image/png" } imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) - imagePayload := `{"image_url":{"url":""},"type":"image_url"}` - imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) imagesResult := gjson.Get(template, "choices.0.delta.images") if !imagesResult.Exists() || !imagesResult.IsArray() { template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) } + imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array()) + imagePayload, err := json.Marshal(map[string]any{ + "index": imageIndex, + "type": "image_url", + "image_url": map[string]string{ + "url": imageURL, + }, + }) + if err != nil { + continue + } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload) + template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload)) } } } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go index d710b1d6..d0220105 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go @@ -8,6 +8,7 @@ package chat_completions import ( "bytes" "context" + "encoding/json" "fmt" "strings" "sync/atomic" @@ -182,14 +183,23 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR mimeType = "image/png" } imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) - imagePayload := `{"image_url":{"url":""},"type":"image_url"}` - imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) imagesResult := gjson.Get(template, "choices.0.delta.images") if !imagesResult.Exists() || !imagesResult.IsArray() { template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) } + imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array()) + imagePayload, err := json.Marshal(map[string]any{ + "index": imageIndex, + "type": "image_url", + "image_url": map[string]string{ + "url": imageURL, + }, + }) + if err != nil { + continue + } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload) + template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload)) } } } @@ -316,14 +326,23 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina mimeType = "image/png" } imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) - imagePayload := `{"image_url":{"url":""},"type":"image_url"}` - imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) imagesResult := gjson.Get(template, "choices.0.message.images") if !imagesResult.Exists() || !imagesResult.IsArray() { template, _ = sjson.SetRaw(template, "choices.0.message.images", `[]`) } + imageIndex := len(gjson.Get(template, "choices.0.message.images").Array()) + imagePayload, err := json.Marshal(map[string]any{ + "index": imageIndex, + "type": "image_url", + "image_url": map[string]string{ + "url": imageURL, + }, + }) + if err != nil { + continue + } template, _ = sjson.Set(template, "choices.0.message.role", "assistant") - template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", imagePayload) + template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", string(imagePayload)) } } } From a7fc2ee4cf4c778907549835d7f4ec71d0c4e32e Mon Sep 17 00:00:00 2001 From: TinyCoder Date: Thu, 25 Dec 2025 14:21:01 +0700 Subject: [PATCH 2/4] refactor(image): avoid using json.Marshal --- .../antigravity_openai_response.go | 16 +++------- .../gemini-cli_openai_response.go | 16 +++------- .../gemini_openai_response.go | 31 +++++-------------- 3 files changed, 16 insertions(+), 47 deletions(-) diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go index 9dae687a..81002cd4 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go @@ -8,7 +8,6 @@ package chat_completions import ( "bytes" "context" - "encoding/json" "fmt" "strings" "sync/atomic" @@ -187,18 +186,11 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) } imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array()) - imagePayload, err := json.Marshal(map[string]any{ - "index": imageIndex, - "type": "image_url", - "image_url": map[string]string{ - "url": imageURL, - }, - }) - if err != nil { - continue - } + imagePayload := `{"type":"image_url","image_url":{"url":""}}` + imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex) + imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload)) + template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload) } } } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go index f02d3658..5a1faf51 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go @@ -8,7 +8,6 @@ package chat_completions import ( "bytes" "context" - "encoding/json" "fmt" "strings" "sync/atomic" @@ -176,18 +175,11 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) } imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array()) - imagePayload, err := json.Marshal(map[string]any{ - "index": imageIndex, - "type": "image_url", - "image_url": map[string]string{ - "url": imageURL, - }, - }) - if err != nil { - continue - } + imagePayload := `{"type":"image_url","image_url":{"url":""}}` + imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex) + imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload)) + template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload) } } } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go index d0220105..56995888 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go @@ -8,7 +8,6 @@ package chat_completions import ( "bytes" "context" - "encoding/json" "fmt" "strings" "sync/atomic" @@ -188,18 +187,11 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) } imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array()) - imagePayload, err := json.Marshal(map[string]any{ - "index": imageIndex, - "type": "image_url", - "image_url": map[string]string{ - "url": imageURL, - }, - }) - if err != nil { - continue - } + imagePayload := `{"type":"image_url","image_url":{"url":""}}` + imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex) + imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") - template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload)) + template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload) } } } @@ -331,18 +323,11 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina template, _ = sjson.SetRaw(template, "choices.0.message.images", `[]`) } imageIndex := len(gjson.Get(template, "choices.0.message.images").Array()) - imagePayload, err := json.Marshal(map[string]any{ - "index": imageIndex, - "type": "image_url", - "image_url": map[string]string{ - "url": imageURL, - }, - }) - if err != nil { - continue - } + imagePayload := `{"type":"image_url","image_url":{"url":""}}` + imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex) + imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) template, _ = sjson.Set(template, "choices.0.message.role", "assistant") - template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", string(imagePayload)) + template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", imagePayload) } } } From 54f71aa2732a625cf3e2baa02749fccee914a6f6 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Hung Date: Thu, 25 Dec 2025 21:55:35 +0700 Subject: [PATCH 3/4] fix(test): remove extra argument from ExecuteStreamWithAuthManager call --- sdk/api/handlers/handlers_stream_bootstrap_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/api/handlers/handlers_stream_bootstrap_test.go b/sdk/api/handlers/handlers_stream_bootstrap_test.go index 7f910447..39eefa84 100644 --- a/sdk/api/handlers/handlers_stream_bootstrap_test.go +++ b/sdk/api/handlers/handlers_stream_bootstrap_test.go @@ -99,7 +99,7 @@ func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) { Streaming: sdkconfig.StreamingConfig{ BootstrapRetries: &bootstrapRetries, }, - }, manager, nil) + }, manager) dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "") if dataChan == nil || errChan == nil { t.Fatalf("expected non-nil channels") From 33e53a2a566cef8af25f96fc7c8173bb9a199f26 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Fri, 26 Dec 2025 05:01:45 +0800 Subject: [PATCH 4/4] fix(translators): ensure correct handling and output of multimodal assistant content across request handlers --- .../antigravity_openai_request.go | 24 +++++++++++++++++- .../gemini-cli_openai_request.go | 25 ++++++++++++++++++- .../chat-completions/gemini_openai_request.go | 6 ++--- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index 573b8d45..ecabce95 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -249,8 +249,28 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ p := 0 if content.Type == gjson.String { node, _ = sjson.SetBytes(node, "parts.-1.text", content.String()) - out, _ = sjson.SetRawBytes(out, "request.contents.-1", node) p++ + } else if content.IsArray() { + // Assistant multimodal content (e.g. text + image) -> single model content with parts + for _, item := range content.Array() { + switch item.Get("type").String() { + case "text": + p++ + case "image_url": + // If the assistant returned an inline data URL, preserve it for history fidelity. + imageURL := item.Get("image_url.url").String() + if len(imageURL) > 5 { // expect data:... + pieces := strings.SplitN(imageURL[5:], ";", 2) + if len(pieces) == 2 && len(pieces[1]) > 7 { + mime := pieces[0] + data := pieces[1][7:] + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime) + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data) + p++ + } + } + } + } } // Tool calls -> single model content with functionCall parts @@ -305,6 +325,8 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ if pp > 0 { out, _ = sjson.SetRawBytes(out, "request.contents.-1", toolNode) } + } else { + out, _ = sjson.SetRawBytes(out, "request.contents.-1", node) } } } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index feb80f65..e1d1a40b 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -218,8 +218,29 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo if content.Type == gjson.String { // Assistant text -> single model content node, _ = sjson.SetBytes(node, "parts.-1.text", content.String()) - out, _ = sjson.SetRawBytes(out, "request.contents.-1", node) p++ + } else if content.IsArray() { + // Assistant multimodal content (e.g. text + image) -> single model content with parts + for _, item := range content.Array() { + switch item.Get("type").String() { + case "text": + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String()) + p++ + case "image_url": + // If the assistant returned an inline data URL, preserve it for history fidelity. + imageURL := item.Get("image_url.url").String() + if len(imageURL) > 5 { // expect data:... + pieces := strings.SplitN(imageURL[5:], ";", 2) + if len(pieces) == 2 && len(pieces[1]) > 7 { + mime := pieces[0] + data := pieces[1][7:] + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime) + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data) + p++ + } + } + } + } } // Tool calls -> single model content with functionCall parts @@ -260,6 +281,8 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo if pp > 0 { out, _ = sjson.SetRawBytes(out, "request.contents.-1", toolNode) } + } else { + out, _ = sjson.SetRawBytes(out, "request.contents.-1", node) } } } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 7b8c5c68..f0902b38 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -233,18 +233,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } else if role == "assistant" { node := []byte(`{"role":"model","parts":[]}`) p := 0 - if content.Type == gjson.String { // Assistant text -> single model content node, _ = sjson.SetBytes(node, "parts.-1.text", content.String()) - out, _ = sjson.SetRawBytes(out, "contents.-1", node) p++ } else if content.IsArray() { // Assistant multimodal content (e.g. text + image) -> single model content with parts for _, item := range content.Array() { switch item.Get("type").String() { case "text": - node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String()) p++ case "image_url": // If the assistant returned an inline data URL, preserve it for history fidelity. @@ -261,7 +258,6 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } } } - out, _ = sjson.SetRawBytes(out, "contents.-1", node) } // Tool calls -> single model content with functionCall parts @@ -302,6 +298,8 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) if pp > 0 { out, _ = sjson.SetRawBytes(out, "contents.-1", toolNode) } + } else { + out, _ = sjson.SetRawBytes(out, "contents.-1", node) } } }