From c5d7137d663c0c8a2f5c32a4ba27527482b413b2 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:50:43 +0800 Subject: [PATCH 1/4] feat(translator): Pass through imageConfig --- .../openai/chat-completions/cli_openai_request.go | 6 ++++++ .../gemini/openai/chat-completions/gemini_openai_request.go | 6 ++++++ .../openai/responses/gemini_openai-responses_request.go | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go index 6ed48486..c69ecf30 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go @@ -79,6 +79,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num) } + // Image config passthrough (e.g., aspectRatio) + // If the input carries generationConfig.imageConfig, preserve it in the Gemini CLI request. + if imgCfg := gjson.GetBytes(rawJSON, "generationConfig.imageConfig"); imgCfg.Exists() && imgCfg.IsObject() { + out, _ = sjson.SetRawBytes(out, "request.generationConfig.imageConfig", []byte(imgCfg.Raw)) + } + // messages -> systemInstruction + contents messages := gjson.GetBytes(rawJSON, "messages") if messages.IsArray() { diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 447bba31..4c5111e7 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -79,6 +79,12 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.topK", tkr.Num) } + // Image config passthrough (e.g., aspectRatio) + // If the input carries generationConfig.imageConfig, preserve it in the Gemini request. + if imgCfg := gjson.GetBytes(rawJSON, "generationConfig.imageConfig"); imgCfg.Exists() && imgCfg.IsObject() { + out, _ = sjson.SetRawBytes(out, "generationConfig.imageConfig", []byte(imgCfg.Raw)) + } + // messages -> systemInstruction + contents messages := gjson.GetBytes(rawJSON, "messages") if messages.IsArray() { diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index af7923ab..9ed23bc5 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -262,5 +262,11 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte } } + // Image config passthrough (e.g., aspectRatio) + // If the input carries generationConfig.imageConfig, preserve it in the Gemini request. + if imgCfg := root.Get("generationConfig.imageConfig"); imgCfg.Exists() && imgCfg.IsObject() { + out, _ = sjson.SetRaw(out, "generationConfig.imageConfig", imgCfg.Raw) + } + return []byte(out) } From a602ae859bc6cd19d44dd4f7fde4efafc99db692 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 9 Oct 2025 15:47:06 +0800 Subject: [PATCH 2/4] feat(translator): Add support for openrouter image_config --- .../openai/chat-completions/cli_openai_request.go | 8 ++++++++ .../openai/chat-completions/gemini_openai_request.go | 8 ++++++++ .../openai/responses/gemini_openai-responses_request.go | 7 +++++++ 3 files changed, 23 insertions(+) diff --git a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go index c69ecf30..62231662 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go @@ -85,6 +85,14 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetRawBytes(out, "request.generationConfig.imageConfig", []byte(imgCfg.Raw)) } + // OpenRouter-style image_config (snake_case) support + // If the input uses top-level image_config.aspect_ratio, map it into request.generationConfig.imageConfig.aspectRatio. + + if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { + if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { + out, _ = sjson.SetBytes(out, "request.generationConfig.imageConfig.aspectRatio", ar.Str) + } + } // messages -> systemInstruction + contents messages := gjson.GetBytes(rawJSON, "messages") if messages.IsArray() { diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 4c5111e7..77ef7dfa 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -85,6 +85,14 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetRawBytes(out, "generationConfig.imageConfig", []byte(imgCfg.Raw)) } + // OpenRouter-style image_config (snake_case) support + // If the input uses top-level image_config.aspect_ratio, map it into generationConfig.imageConfig.aspectRatio. + if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { + if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { + out, _ = sjson.SetBytes(out, "generationConfig.imageConfig.aspectRatio", ar.Str) + } + } + // messages -> systemInstruction + contents messages := gjson.GetBytes(rawJSON, "messages") if messages.IsArray() { diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 9ed23bc5..3bc2ec98 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -268,5 +268,12 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.SetRaw(out, "generationConfig.imageConfig", imgCfg.Raw) } + // OpenRouter-style image_config (snake_case) support at top-level + if imgCfg := root.Get("image_config"); imgCfg.Exists() && imgCfg.IsObject() { + if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { + out, _ = sjson.Set(out, "generationConfig.imageConfig.aspectRatio", ar.String()) + } + } + return []byte(out) } From d1933075c30ef1ee980d829b5636351e1a54915b Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 9 Oct 2025 16:35:08 +0800 Subject: [PATCH 3/4] Revert "feat(translator): Pass through imageConfig" --- .../openai/chat-completions/cli_openai_request.go | 10 ++-------- .../openai/chat-completions/gemini_openai_request.go | 8 +------- .../responses/gemini_openai-responses_request.go | 8 +------- 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go index 62231662..3d1152f5 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go @@ -79,20 +79,14 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num) } - // Image config passthrough (e.g., aspectRatio) - // If the input carries generationConfig.imageConfig, preserve it in the Gemini CLI request. - if imgCfg := gjson.GetBytes(rawJSON, "generationConfig.imageConfig"); imgCfg.Exists() && imgCfg.IsObject() { - out, _ = sjson.SetRawBytes(out, "request.generationConfig.imageConfig", []byte(imgCfg.Raw)) - } - - // OpenRouter-style image_config (snake_case) support + // OpenRouter-style image_config support // If the input uses top-level image_config.aspect_ratio, map it into request.generationConfig.imageConfig.aspectRatio. - if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { out, _ = sjson.SetBytes(out, "request.generationConfig.imageConfig.aspectRatio", ar.Str) } } + // messages -> systemInstruction + contents messages := gjson.GetBytes(rawJSON, "messages") if messages.IsArray() { diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 77ef7dfa..075d2b55 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -79,13 +79,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.topK", tkr.Num) } - // Image config passthrough (e.g., aspectRatio) - // If the input carries generationConfig.imageConfig, preserve it in the Gemini request. - if imgCfg := gjson.GetBytes(rawJSON, "generationConfig.imageConfig"); imgCfg.Exists() && imgCfg.IsObject() { - out, _ = sjson.SetRawBytes(out, "generationConfig.imageConfig", []byte(imgCfg.Raw)) - } - - // OpenRouter-style image_config (snake_case) support + // OpenRouter-style image_config support // If the input uses top-level image_config.aspect_ratio, map it into generationConfig.imageConfig.aspectRatio. if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 3bc2ec98..842758ad 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -262,13 +262,7 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte } } - // Image config passthrough (e.g., aspectRatio) - // If the input carries generationConfig.imageConfig, preserve it in the Gemini request. - if imgCfg := root.Get("generationConfig.imageConfig"); imgCfg.Exists() && imgCfg.IsObject() { - out, _ = sjson.SetRaw(out, "generationConfig.imageConfig", imgCfg.Raw) - } - - // OpenRouter-style image_config (snake_case) support at top-level + // OpenRouter-style image_config support at top-level if imgCfg := root.Get("image_config"); imgCfg.Exists() && imgCfg.IsObject() { if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { out, _ = sjson.Set(out, "generationConfig.imageConfig.aspectRatio", ar.String()) From 01be94a0de61c718fe0b4119d9ad9446735d011a Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 9 Oct 2025 19:38:07 +0800 Subject: [PATCH 4/4] feat(translator): Map OpenAI modalities to Gemini responseModalities --- .../chat-completions/cli_openai_request.go | 17 +++++++++++++++++ .../chat-completions/gemini_openai_request.go | 17 +++++++++++++++++ .../gemini_openai-responses_request.go | 7 ------- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go index 3d1152f5..87e3ce0a 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go @@ -79,6 +79,23 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num) } + // Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities + // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { + var responseMods []string + for _, m := range mods.Array() { + switch strings.ToLower(m.String()) { + case "text": + responseMods = append(responseMods, "Text") + case "image": + responseMods = append(responseMods, "Image") + } + } + if len(responseMods) > 0 { + out, _ = sjson.SetBytes(out, "request.generationConfig.responseModalities", responseMods) + } + } + // OpenRouter-style image_config support // If the input uses top-level image_config.aspect_ratio, map it into request.generationConfig.imageConfig.aspectRatio. if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 075d2b55..41476a02 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -79,6 +79,23 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.topK", tkr.Num) } + // Map OpenAI modalities -> Gemini generationConfig.responseModalities + // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { + var responseMods []string + for _, m := range mods.Array() { + switch strings.ToLower(m.String()) { + case "text": + responseMods = append(responseMods, "Text") + case "image": + responseMods = append(responseMods, "Image") + } + } + if len(responseMods) > 0 { + out, _ = sjson.SetBytes(out, "generationConfig.responseModalities", responseMods) + } + } + // OpenRouter-style image_config support // If the input uses top-level image_config.aspect_ratio, map it into generationConfig.imageConfig.aspectRatio. if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 842758ad..af7923ab 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -262,12 +262,5 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte } } - // OpenRouter-style image_config support at top-level - if imgCfg := root.Get("image_config"); imgCfg.Exists() && imgCfg.IsObject() { - if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { - out, _ = sjson.Set(out, "generationConfig.imageConfig.aspectRatio", ar.String()) - } - } - return []byte(out) }