diff --git a/config.example.yaml b/config.example.yaml
index fe37a8f5..812c50ea 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -105,6 +105,10 @@ routing:
 # When true, enable authentication for the WebSocket API (/v1/ws).
 ws-auth: false
 
+# When true, enable Gemini CLI internal endpoints (/v1internal:*).
+# Default is false for safety.
+enable-gemini-cli-endpoint: false
+
 # When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
 nonstream-keepalive-interval: 0
 
diff --git a/internal/config/sdk_config.go b/internal/config/sdk_config.go
index 9d99c924..aa27526d 100644
--- a/internal/config/sdk_config.go
+++ b/internal/config/sdk_config.go
@@ -9,6 +9,10 @@ type SDKConfig struct {
 	// ProxyURL is the URL of an optional proxy server to use for outbound requests.
 	ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
 
+	// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
+	// Default is false for safety; when false, /v1internal:* requests are rejected.
+	EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`
+
 	// ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview")
 	// to target prefixed credentials. When false, unprefixed model requests may use prefixed
 	// credentials as well.
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index d263b40b..7b9fffc5 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -30,6 +30,8 @@ const (
 	qwenRateLimitWindow = time.Minute // sliding window duration
 )
 
+var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
+
 // qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
 var qwenBeijingLoc = func() *time.Location {
 	loc, err := time.LoadLocation("Asia/Shanghai")
@@ -170,6 +172,42 @@ func timeUntilNextDay() time.Duration {
 	return tomorrow.Sub(now)
 }
 
+// ensureQwenSystemMessage prepends a default system message if none exists in "messages".
+func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
+	messages := gjson.GetBytes(payload, "messages")
+	if messages.Exists() && messages.IsArray() {
+		for _, msg := range messages.Array() {
+			if strings.EqualFold(msg.Get("role").String(), "system") {
+				return payload, nil
+			}
+		}
+
+		var buf bytes.Buffer
+		buf.WriteByte('[')
+		buf.Write(qwenDefaultSystemMessage)
+		for _, msg := range messages.Array() {
+			buf.WriteByte(',')
+			buf.WriteString(msg.Raw)
+		}
+		buf.WriteByte(']')
+		updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
+		if errSet != nil {
+			return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
+		}
+		return updated, nil
+	}
+
+	var buf bytes.Buffer
+	buf.WriteByte('[')
+	buf.Write(qwenDefaultSystemMessage)
+	buf.WriteByte(']')
+	updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
+	if errSet != nil {
+		return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
+	}
+	return updated, nil
+}
+
 // QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
 // If access token is unavailable, it falls back to legacy via ClientAdapter.
 type QwenExecutor struct {
@@ -251,6 +289,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 
 	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
 	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, err = ensureQwenSystemMessage(body)
+	if err != nil {
+		return resp, err
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -357,15 +399,19 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		return nil, err
 	}
 
-	toolsResult := gjson.GetBytes(body, "tools")
+	// toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
 	// This will have no real consequences. It's just to scare Qwen3.
-	if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
-		body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
-	}
+	// if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
+	// 	body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
+	// }
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
 	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
 	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, err = ensureQwenSystemMessage(body)
+	if err != nil {
+		return nil, err
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
diff --git a/sdk/api/handlers/gemini/gemini-cli_handlers.go b/sdk/api/handlers/gemini/gemini-cli_handlers.go
index b5fd4943..4c5ddf80 100644
--- a/sdk/api/handlers/gemini/gemini-cli_handlers.go
+++ b/sdk/api/handlers/gemini/gemini-cli_handlers.go
@@ -9,6 +9,7 @@ import (
 	"context"
 	"fmt"
 	"io"
+	"net"
 	"net/http"
 	"strings"
 	"time"
@@ -49,7 +50,23 @@ func (h *GeminiCLIAPIHandler) Models() []map[string]any {
 // CLIHandler handles CLI-specific requests for Gemini API operations.
 // It restricts access to localhost only and routes requests to appropriate internal handlers.
 func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
-	if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
+	if h.Cfg == nil || !h.Cfg.EnableGeminiCLIEndpoint {
+		c.JSON(http.StatusForbidden, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Gemini CLI endpoint is disabled",
+				Type:    "forbidden",
+			},
+		})
+		return
+	}
+
+	requestHost := c.Request.Host
+	requestHostname := requestHost
+	if hostname, _, errSplitHostPort := net.SplitHostPort(requestHost); errSplitHostPort == nil {
+		requestHostname = hostname
+	}
+
+	if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") || requestHostname != "127.0.0.1" {
 		c.JSON(http.StatusForbidden, handlers.ErrorResponse{
 			Error: handlers.ErrorDetail{
 				Message: "CLI reply only allow local access",