diff --git a/internal/client/gemini-web_client.go b/internal/client/gemini-web_client.go index 44f3224b..2a1aa37c 100644 --- a/internal/client/gemini-web_client.go +++ b/internal/client/gemini-web_client.go @@ -207,7 +207,7 @@ func (c *GeminiWebClient) registerModelsOnce() { if c.modelsRegistered { return } - c.RegisterModels(GEMINI, geminiWeb.GetGeminiWebAliasedModels()) + c.RegisterModels(GEMINIWEB, geminiWeb.GetGeminiWebAliasedModels()) c.modelsRegistered = true } @@ -219,8 +219,8 @@ func (c *GeminiWebClient) EnsureRegistered() { } } -func (c *GeminiWebClient) Type() string { return GEMINI } -func (c *GeminiWebClient) Provider() string { return GEMINI } +func (c *GeminiWebClient) Type() string { return GEMINIWEB } +func (c *GeminiWebClient) Provider() string { return GEMINIWEB } func (c *GeminiWebClient) CanProvideModel(modelName string) bool { geminiWeb.EnsureGeminiWebAliasMap() _, ok := geminiWeb.GeminiWebAliasMap[strings.ToLower(modelName)] diff --git a/internal/constant/constant.go b/internal/constant/constant.go index 4e39d93f..bfa7558d 100644 --- a/internal/constant/constant.go +++ b/internal/constant/constant.go @@ -3,6 +3,7 @@ package constant const ( GEMINI = "gemini" GEMINICLI = "gemini-cli" + GEMINIWEB = "gemini-web" CODEX = "codex" CLAUDE = "claude" OPENAI = "openai" diff --git a/internal/translator/gemini-web/openai/chat-completions/init.go b/internal/translator/gemini-web/openai/chat-completions/init.go new file mode 100644 index 00000000..9384bd04 --- /dev/null +++ b/internal/translator/gemini-web/openai/chat-completions/init.go @@ -0,0 +1,20 @@ +package chat_completions + +import ( + . "github.com/luispater/CLIProxyAPI/v5/internal/constant" + "github.com/luispater/CLIProxyAPI/v5/internal/interfaces" + geminiChat "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/chat-completions" + "github.com/luispater/CLIProxyAPI/v5/internal/translator/translator" +) + +func init() { + translator.Register( + OPENAI, + GEMINIWEB, + geminiChat.ConvertOpenAIRequestToGemini, + interfaces.TranslateResponse{ + Stream: geminiChat.ConvertGeminiResponseToOpenAI, + NonStream: geminiChat.ConvertGeminiResponseToOpenAINonStream, + }, + ) +} diff --git a/internal/translator/gemini-web/openai/responses/init.go b/internal/translator/gemini-web/openai/responses/init.go new file mode 100644 index 00000000..c7ed6149 --- /dev/null +++ b/internal/translator/gemini-web/openai/responses/init.go @@ -0,0 +1,20 @@ +package responses + +import ( + . "github.com/luispater/CLIProxyAPI/v5/internal/constant" + "github.com/luispater/CLIProxyAPI/v5/internal/interfaces" + geminiResponses "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/responses" + "github.com/luispater/CLIProxyAPI/v5/internal/translator/translator" +) + +func init() { + translator.Register( + OPENAI_RESPONSE, + GEMINIWEB, + geminiResponses.ConvertOpenAIResponsesRequestToGemini, + interfaces.TranslateResponse{ + Stream: geminiResponses.ConvertGeminiResponseToOpenAIResponses, + NonStream: geminiResponses.ConvertGeminiResponseToOpenAIResponsesNonStream, + }, + ) +} diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 6e842ab2..97320333 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -170,6 +170,31 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) node := []byte(`{"role":"model","parts":[{"text":""}]}`) node, _ = sjson.SetBytes(node, "parts.0.text", content.String()) out, _ = sjson.SetRawBytes(out, "contents.-1", node) + } else if content.IsArray() { + // Assistant multimodal content (e.g. text + image) -> single model content with parts + node := []byte(`{"role":"model","parts":[]}`) + p := 0 + for _, item := range content.Array() { + switch item.Get("type").String() { + case "text": + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String()) + p++ + case "image_url": + // If the assistant returned an inline data URL, preserve it for history fidelity. + imageURL := item.Get("image_url.url").String() + if len(imageURL) > 5 { // expect data:... + pieces := strings.SplitN(imageURL[5:], ";", 2) + if len(pieces) == 2 && len(pieces[1]) > 7 { + mime := pieces[0] + data := pieces[1][7:] + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime) + node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data) + p++ + } + } + } + } + out, _ = sjson.SetRawBytes(out, "contents.-1", node) } else if !content.Exists() || content.Type == gjson.Null { // Tool calls -> single model content with functionCall parts tcs := m.Get("tool_calls") diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go index 420812cb..f7c23b78 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go @@ -8,6 +8,7 @@ package chat_completions import ( "bytes" "context" + "encoding/json" "fmt" "time" @@ -99,6 +100,10 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR partResult := partResults[i] partTextResult := partResult.Get("text") functionCallResult := partResult.Get("functionCall") + inlineDataResult := partResult.Get("inlineData") + if !inlineDataResult.Exists() { + inlineDataResult = partResult.Get("inline_data") + } if partTextResult.Exists() { // Handle text content, distinguishing between regular content and reasoning/thoughts. @@ -124,6 +129,34 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR } template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate) + } else if inlineDataResult.Exists() { + data := inlineDataResult.Get("data").String() + if data == "" { + continue + } + mimeType := inlineDataResult.Get("mimeType").String() + if mimeType == "" { + mimeType = inlineDataResult.Get("mime_type").String() + } + if mimeType == "" { + mimeType = "image/png" + } + imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) + imagePayload, err := json.Marshal(map[string]any{ + "type": "image_url", + "image_url": map[string]string{ + "url": imageURL, + }, + }) + if err != nil { + continue + } + imagesResult := gjson.Get(template, "choices.0.delta.images") + if !imagesResult.Exists() || !imagesResult.IsArray() { + template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`) + } + template, _ = sjson.Set(template, "choices.0.delta.role", "assistant") + template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload)) } } } @@ -193,6 +226,10 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina partResult := partsResults[i] partTextResult := partResult.Get("text") functionCallResult := partResult.Get("functionCall") + inlineDataResult := partResult.Get("inlineData") + if !inlineDataResult.Exists() { + inlineDataResult = partResult.Get("inline_data") + } if partTextResult.Exists() { // Append text content, distinguishing between regular content and reasoning. @@ -217,9 +254,34 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina } template, _ = sjson.Set(template, "choices.0.message.role", "assistant") template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls.-1", functionCallItemTemplate) - } else { - // If no usable content is found, return an empty string. - return "" + } else if inlineDataResult.Exists() { + data := inlineDataResult.Get("data").String() + if data == "" { + continue + } + mimeType := inlineDataResult.Get("mimeType").String() + if mimeType == "" { + mimeType = inlineDataResult.Get("mime_type").String() + } + if mimeType == "" { + mimeType = "image/png" + } + imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data) + imagePayload, err := json.Marshal(map[string]any{ + "type": "image_url", + "image_url": map[string]string{ + "url": imageURL, + }, + }) + if err != nil { + continue + } + imagesResult := gjson.Get(template, "choices.0.message.images") + if !imagesResult.Exists() || !imagesResult.IsArray() { + template, _ = sjson.SetRaw(template, "choices.0.message.images", `[]`) + } + template, _ = sjson.Set(template, "choices.0.message.role", "assistant") + template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", string(imagePayload)) } } } diff --git a/internal/translator/init.go b/internal/translator/init.go index 4905fc1f..f54db620 100644 --- a/internal/translator/init.go +++ b/internal/translator/init.go @@ -23,6 +23,9 @@ import ( _ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/chat-completions" _ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/responses" + _ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini-web/openai/chat-completions" + _ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini-web/openai/responses" + _ "github.com/luispater/CLIProxyAPI/v5/internal/translator/openai/claude" _ "github.com/luispater/CLIProxyAPI/v5/internal/translator/openai/gemini" _ "github.com/luispater/CLIProxyAPI/v5/internal/translator/openai/gemini-cli"