diff --git a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go index 6ed48486..87e3ce0a 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/cli_openai_request.go @@ -79,6 +79,31 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num) } + // Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities + // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { + var responseMods []string + for _, m := range mods.Array() { + switch strings.ToLower(m.String()) { + case "text": + responseMods = append(responseMods, "Text") + case "image": + responseMods = append(responseMods, "Image") + } + } + if len(responseMods) > 0 { + out, _ = sjson.SetBytes(out, "request.generationConfig.responseModalities", responseMods) + } + } + + // OpenRouter-style image_config support + // If the input uses top-level image_config.aspect_ratio, map it into request.generationConfig.imageConfig.aspectRatio. + if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { + if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { + out, _ = sjson.SetBytes(out, "request.generationConfig.imageConfig.aspectRatio", ar.Str) + } + } + // messages -> systemInstruction + contents messages := gjson.GetBytes(rawJSON, "messages") if messages.IsArray() { diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 447bba31..41476a02 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -79,6 +79,31 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.topK", tkr.Num) } + // Map OpenAI modalities -> Gemini generationConfig.responseModalities + // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { + var responseMods []string + for _, m := range mods.Array() { + switch strings.ToLower(m.String()) { + case "text": + responseMods = append(responseMods, "Text") + case "image": + responseMods = append(responseMods, "Image") + } + } + if len(responseMods) > 0 { + out, _ = sjson.SetBytes(out, "generationConfig.responseModalities", responseMods) + } + } + + // OpenRouter-style image_config support + // If the input uses top-level image_config.aspect_ratio, map it into generationConfig.imageConfig.aspectRatio. + if imgCfg := gjson.GetBytes(rawJSON, "image_config"); imgCfg.Exists() && imgCfg.IsObject() { + if ar := imgCfg.Get("aspect_ratio"); ar.Exists() && ar.Type == gjson.String { + out, _ = sjson.SetBytes(out, "generationConfig.imageConfig.aspectRatio", ar.Str) + } + } + // messages -> systemInstruction + contents messages := gjson.GetBytes(rawJSON, "messages") if messages.IsArray() {