diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index 573b8d45..ecabce95 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -249,8 +249,28 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ p := 0 if content.Type == gjson.String { node, _ = sjson.SetBytes(node, "parts.-1.text", content.String()) - out, _ = sjson.SetRawBytes(out, "request.contents.-1", node) p++ + } else if content.IsArray() { + // Assistant multimodal content (e.g. text + image) -> single model content with parts + for _, item := range content.Array() { + switch item.Get("type").String() { + case "text": + p++ + case "image_url": + // If the assistant returned an inline data URL, preserve it for history fidelity. + imageURL := item.Get("image_url.url").String() + if len(imageURL) > 5 { // expect data:... + pieces := strings.SplitN(imageURL[5:], ";", 2) + if len(pieces) == 2 && len(pieces[1]) > 7 { + mime := pieces[0] + data := pieces[1][7:] + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime) + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data) + p++ + } + } + } + } } // Tool calls -> single model content with functionCall parts @@ -305,6 +325,8 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ if pp > 0 { out, _ = sjson.SetRawBytes(out, "request.contents.-1", toolNode) } + } else { + out, _ = sjson.SetRawBytes(out, "request.contents.-1", node) } } } diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go index 5f88090b..1b7866d0 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go @@ -181,12 +181,14 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq mimeType = "image/png" } imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) - imagePayload := `{"image_url":{"url":""},"type":"image_url"}` - imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) imagesResult := gjson.Get(template, "choices.0.delta.images") if !imagesResult.Exists() || !imagesResult.IsArray() { template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) } + imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array()) + imagePayload := `{"type":"image_url","image_url":{"url":""}}` + imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex) + imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload) } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index feb80f65..e1d1a40b 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -218,8 +218,29 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo if content.Type == gjson.String { // Assistant text -> single model content node, _ = sjson.SetBytes(node, "parts.-1.text", content.String()) - out, _ = sjson.SetRawBytes(out, "request.contents.-1", node) p++ + } else if content.IsArray() { + // Assistant multimodal content (e.g. text + image) -> single model content with parts + for _, item := range content.Array() { + switch item.Get("type").String() { + case "text": + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String()) + p++ + case "image_url": + // If the assistant returned an inline data URL, preserve it for history fidelity. + imageURL := item.Get("image_url.url").String() + if len(imageURL) > 5 { // expect data:... + pieces := strings.SplitN(imageURL[5:], ";", 2) + if len(pieces) == 2 && len(pieces[1]) > 7 { + mime := pieces[0] + data := pieces[1][7:] + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime) + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data) + p++ + } + } + } + } } // Tool calls -> single model content with functionCall parts @@ -260,6 +281,8 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo if pp > 0 { out, _ = sjson.SetRawBytes(out, "request.contents.-1", toolNode) } + } else { + out, _ = sjson.SetRawBytes(out, "request.contents.-1", node) } } } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go index 2b3ac37e..5a1faf51 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go @@ -170,12 +170,14 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ mimeType = "image/png" } imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) - imagePayload := `{"image_url":{"url":""},"type":"image_url"}` - imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) imagesResult := gjson.Get(template, "choices.0.delta.images") if !imagesResult.Exists() || !imagesResult.IsArray() { template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) } + imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array()) + imagePayload := `{"type":"image_url","image_url":{"url":""}}` + imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex) + imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload) } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 7b8c5c68..f0902b38 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -233,18 +233,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } else if role == "assistant" { node := []byte(`{"role":"model","parts":[]}`) p := 0 - if content.Type == gjson.String { // Assistant text -> single model content node, _ = sjson.SetBytes(node, "parts.-1.text", content.String()) - out, _ = sjson.SetRawBytes(out, "contents.-1", node) p++ } else if content.IsArray() { // Assistant multimodal content (e.g. text + image) -> single model content with parts for _, item := range content.Array() { switch item.Get("type").String() { case "text": - node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String()) p++ case "image_url": // If the assistant returned an inline data URL, preserve it for history fidelity. @@ -261,7 +258,6 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } } } - out, _ = sjson.SetRawBytes(out, "contents.-1", node) } // Tool calls -> single model content with functionCall parts @@ -302,6 +298,8 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) if pp > 0 { out, _ = sjson.SetRawBytes(out, "contents.-1", toolNode) } + } else { + out, _ = sjson.SetRawBytes(out, "contents.-1", node) } } } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go index 39e61fde..52fbba43 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go @@ -182,12 +182,14 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR mimeType = "image/png" } imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) - imagePayload := `{"image_url":{"url":""},"type":"image_url"}` - imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) imagesResult := gjson.Get(template, "choices.0.delta.images") if !imagesResult.Exists() || !imagesResult.IsArray() { template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) } + imageIndex := len(gjson.Get(template, "choices.0.delta.images").Array()) + imagePayload := `{"type":"image_url","image_url":{"url":""}}` + imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex) + imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", imagePayload) } @@ -316,12 +318,14 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina mimeType = "image/png" } imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) - imagePayload := `{"image_url":{"url":""},"type":"image_url"}` - imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) imagesResult := gjson.Get(template, "choices.0.message.images") if !imagesResult.Exists() || !imagesResult.IsArray() { template, _ = sjson.SetRaw(template, "choices.0.message.images", `[]`) } + imageIndex := len(gjson.Get(template, "choices.0.message.images").Array()) + imagePayload := `{"type":"image_url","image_url":{"url":""}}` + imagePayload, _ = sjson.Set(imagePayload, "index", imageIndex) + imagePayload, _ = sjson.Set(imagePayload, "image_url.url", imageURL) template, _ = sjson.Set(template, "choices.0.message.role", "assistant") template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", imagePayload) } diff --git a/sdk/api/handlers/handlers_stream_bootstrap_test.go b/sdk/api/handlers/handlers_stream_bootstrap_test.go index 7f910447..39eefa84 100644 --- a/sdk/api/handlers/handlers_stream_bootstrap_test.go +++ b/sdk/api/handlers/handlers_stream_bootstrap_test.go @@ -99,7 +99,7 @@ func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) { Streaming: sdkconfig.StreamingConfig{ BootstrapRetries: &bootstrapRetries, }, - }, manager, nil) + }, manager) dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "") if dataChan == nil || errChan == nil { t.Fatalf("expected non-nil channels")