diff --git a/config.example.yaml b/config.example.yaml index fe37a8f5..812c50ea 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -105,6 +105,10 @@ routing: # When true, enable authentication for the WebSocket API (/v1/ws). ws-auth: false +# When true, enable Gemini CLI internal endpoints (/v1internal:*). +# Default is false for safety. +enable-gemini-cli-endpoint: false + # When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts. nonstream-keepalive-interval: 0 diff --git a/internal/config/sdk_config.go b/internal/config/sdk_config.go index 9d99c924..aa27526d 100644 --- a/internal/config/sdk_config.go +++ b/internal/config/sdk_config.go @@ -9,6 +9,10 @@ type SDKConfig struct { // ProxyURL is the URL of an optional proxy server to use for outbound requests. ProxyURL string `yaml:"proxy-url" json:"proxy-url"` + // EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled. + // Default is false for safety; when false, /v1internal:* requests are rejected. + EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"` + // ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview") // to target prefixed credentials. When false, unprefixed model requests may use prefixed // credentials as well. diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index d263b40b..7b9fffc5 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -30,6 +30,8 @@ const ( qwenRateLimitWindow = time.Minute // sliding window duration ) +var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`) + // qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls. var qwenBeijingLoc = func() *time.Location { loc, err := time.LoadLocation("Asia/Shanghai") @@ -170,6 +172,42 @@ func timeUntilNextDay() time.Duration { return tomorrow.Sub(now) } +// ensureQwenSystemMessage prepends a default system message if none exists in "messages". +func ensureQwenSystemMessage(payload []byte) ([]byte, error) { + messages := gjson.GetBytes(payload, "messages") + if messages.Exists() && messages.IsArray() { + for _, msg := range messages.Array() { + if strings.EqualFold(msg.Get("role").String(), "system") { + return payload, nil + } + } + + var buf bytes.Buffer + buf.WriteByte('[') + buf.Write(qwenDefaultSystemMessage) + for _, msg := range messages.Array() { + buf.WriteByte(',') + buf.WriteString(msg.Raw) + } + buf.WriteByte(']') + updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes()) + if errSet != nil { + return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet) + } + return updated, nil + } + + var buf bytes.Buffer + buf.WriteByte('[') + buf.Write(qwenDefaultSystemMessage) + buf.WriteByte(']') + updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes()) + if errSet != nil { + return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet) + } + return updated, nil +} + // QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions. // If access token is unavailable, it falls back to legacy via ClientAdapter. type QwenExecutor struct { @@ -251,6 +289,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req requestedModel := helps.PayloadRequestedModel(opts, req.Model) body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + body, err = ensureQwenSystemMessage(body) + if err != nil { + return resp, err + } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -357,15 +399,19 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut return nil, err } - toolsResult := gjson.GetBytes(body, "tools") + // toolsResult := gjson.GetBytes(body, "tools") // I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response. // This will have no real consequences. It's just to scare Qwen3. - if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() { - body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) - } + // if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() { + // body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) + // } body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) requestedModel := helps.PayloadRequestedModel(opts, req.Model) body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) + body, err = ensureQwenSystemMessage(body) + if err != nil { + return nil, err + } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) diff --git a/sdk/api/handlers/gemini/gemini-cli_handlers.go b/sdk/api/handlers/gemini/gemini-cli_handlers.go index b5fd4943..4c5ddf80 100644 --- a/sdk/api/handlers/gemini/gemini-cli_handlers.go +++ b/sdk/api/handlers/gemini/gemini-cli_handlers.go @@ -9,6 +9,7 @@ import ( "context" "fmt" "io" + "net" "net/http" "strings" "time" @@ -49,7 +50,23 @@ func (h *GeminiCLIAPIHandler) Models() []map[string]any { // CLIHandler handles CLI-specific requests for Gemini API operations. // It restricts access to localhost only and routes requests to appropriate internal handlers. func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) { - if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") { + if h.Cfg == nil || !h.Cfg.EnableGeminiCLIEndpoint { + c.JSON(http.StatusForbidden, handlers.ErrorResponse{ + Error: handlers.ErrorDetail{ + Message: "Gemini CLI endpoint is disabled", + Type: "forbidden", + }, + }) + return + } + + requestHost := c.Request.Host + requestHostname := requestHost + if hostname, _, errSplitHostPort := net.SplitHostPort(requestHost); errSplitHostPort == nil { + requestHostname = hostname + } + + if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") || requestHostname != "127.0.0.1" { c.JSON(http.StatusForbidden, handlers.ErrorResponse{ Error: handlers.ErrorDetail{ Message: "CLI reply only allow local access",