Merge branch 'router-for-me:main' into main

fix(executor): remove commented-out code in QwenExecutor
Merge pull request #2520 from Arronlong/main
2026-04-05 20:23:01 +00:00 · 2026-04-04 02:18:06 +08:00 · 2026-04-04 02:14:48 +08:00 · 2026-04-04 02:13:09 +08:00 · 2026-04-03 23:55:50 +08:00 · 2026-04-03 23:45:38 +08:00
10 changed files with 586 additions and 77 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -105,6 +105,10 @@ routing:
 # When true, enable authentication for the WebSocket API (/v1/ws).
 ws-auth: false

+# When true, enable Gemini CLI internal endpoints (/v1internal:*).
+# Default is false for safety.
+enable-gemini-cli-endpoint: false
+
 # When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
 nonstream-keepalive-interval: 0

--- a/internal/config/sdk_config.go
+++ b/internal/config/sdk_config.go
@@ -9,6 +9,10 @@ type SDKConfig struct {
 	// ProxyURL is the URL of an optional proxy server to use for outbound requests.
 	ProxyURL string `yaml:"proxy-url" json:"proxy-url"`

+	// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
+	// Default is false for safety; when false, /v1internal:* requests are rejected.
+	EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`
+
 	// ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview")
 	// to target prefixed credentials. When false, unprefixed model requests may use prefixed
 	// credentials as well.
--- a/internal/registry/models/models.json
+++ b/internal/registry/models/models.json
@@ -280,6 +280,7 @@
        "dynamic_allowed": true,
        "levels": [
          "low",
+          "medium",
          "high"
        ]
      }
@@ -554,6 +555,7 @@
        "dynamic_allowed": true,
        "levels": [
          "low",
+          "medium",
          "high"
        ]
      }
@@ -610,6 +612,8 @@
        "dynamic_allowed": true,
        "levels": [
          "minimal",
+          "low",
+          "medium",
          "high"
        ]
      }
@@ -838,6 +842,7 @@
        "dynamic_allowed": true,
        "levels": [
          "low",
+          "medium",
          "high"
        ]
      }
@@ -896,6 +901,8 @@
        "dynamic_allowed": true,
        "levels": [
          "minimal",
+          "low",
+          "medium",
          "high"
        ]
      }
@@ -1070,6 +1077,8 @@
        "dynamic_allowed": true,
        "levels": [
          "minimal",
+          "low",
+          "medium",
          "high"
        ]
      }
@@ -1371,6 +1380,75 @@
          "xhigh"
        ]
      }
+    },
+    {
+      "id": "gpt-5.3-codex",
+      "object": "model",
+      "created": 1770307200,
+      "owned_by": "openai",
+      "type": "openai",
+      "display_name": "GPT 5.3 Codex",
+      "version": "gpt-5.3",
+      "description": "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.",
+      "context_length": 400000,
+      "max_completion_tokens": 128000,
+      "supported_parameters": [
+        "tools"
+      ],
+      "thinking": {
+        "levels": [
+          "low",
+          "medium",
+          "high",
+          "xhigh"
+        ]
+      }
+    },
+    {
+      "id": "gpt-5.4",
+      "object": "model",
+      "created": 1772668800,
+      "owned_by": "openai",
+      "type": "openai",
+      "display_name": "GPT 5.4",
+      "version": "gpt-5.4",
+      "description": "Stable version of GPT 5.4",
+      "context_length": 1050000,
+      "max_completion_tokens": 128000,
+      "supported_parameters": [
+        "tools"
+      ],
+      "thinking": {
+        "levels": [
+          "low",
+          "medium",
+          "high",
+          "xhigh"
+        ]
+      }
+    },
+    {
+      "id": "gpt-5.4-mini",
+      "object": "model",
+      "created": 1773705600,
+      "owned_by": "openai",
+      "type": "openai",
+      "display_name": "GPT 5.4 Mini",
+      "version": "gpt-5.4-mini",
+      "description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
+      "context_length": 400000,
+      "max_completion_tokens": 128000,
+      "supported_parameters": [
+        "tools"
+      ],
+      "thinking": {
+        "levels": [
+          "low",
+          "medium",
+          "high",
+          "xhigh"
+        ]
+      }
    }
  ],
  "codex-team": [
@@ -1623,6 +1701,29 @@
          "xhigh"
        ]
      }
+    },
+    {
+      "id": "gpt-5.4-mini",
+      "object": "model",
+      "created": 1773705600,
+      "owned_by": "openai",
+      "type": "openai",
+      "display_name": "GPT 5.4 Mini",
+      "version": "gpt-5.4-mini",
+      "description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
+      "context_length": 400000,
+      "max_completion_tokens": 128000,
+      "supported_parameters": [
+        "tools"
+      ],
+      "thinking": {
+        "levels": [
+          "low",
+          "medium",
+          "high",
+          "xhigh"
+        ]
+      }
    }
  ],
  "codex-plus": [
@@ -1898,6 +1999,29 @@
          "xhigh"
        ]
      }
+    },
+    {
+      "id": "gpt-5.4-mini",
+      "object": "model",
+      "created": 1773705600,
+      "owned_by": "openai",
+      "type": "openai",
+      "display_name": "GPT 5.4 Mini",
+      "version": "gpt-5.4-mini",
+      "description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
+      "context_length": 400000,
+      "max_completion_tokens": 128000,
+      "supported_parameters": [
+        "tools"
+      ],
+      "thinking": {
+        "levels": [
+          "low",
+          "medium",
+          "high",
+          "xhigh"
+        ]
+      }
    }
  ],
  "codex-pro": [
@@ -2173,55 +2297,40 @@
          "xhigh"
        ]
      }
+    },
+    {
+      "id": "gpt-5.4-mini",
+      "object": "model",
+      "created": 1773705600,
+      "owned_by": "openai",
+      "type": "openai",
+      "display_name": "GPT 5.4 Mini",
+      "version": "gpt-5.4-mini",
+      "description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
+      "context_length": 400000,
+      "max_completion_tokens": 128000,
+      "supported_parameters": [
+        "tools"
+      ],
+      "thinking": {
+        "levels": [
+          "low",
+          "medium",
+          "high",
+          "xhigh"
+        ]
+      }
    }
  ],
  "qwen": [
-    {
-      "id": "qwen3-coder-plus",
-      "object": "model",
-      "created": 1753228800,
-      "owned_by": "qwen",
-      "type": "qwen",
-      "display_name": "Qwen3 Coder Plus",
-      "version": "3.0",
-      "description": "Advanced code generation and understanding model",
-      "context_length": 32768,
-      "max_completion_tokens": 8192,
-      "supported_parameters": [
-        "temperature",
-        "top_p",
-        "max_tokens",
-        "stream",
-        "stop"
-      ]
-    },
-    {
-      "id": "qwen3-coder-flash",
-      "object": "model",
-      "created": 1753228800,
-      "owned_by": "qwen",
-      "type": "qwen",
-      "display_name": "Qwen3 Coder Flash",
-      "version": "3.0",
-      "description": "Fast code generation model",
-      "context_length": 8192,
-      "max_completion_tokens": 2048,
-      "supported_parameters": [
-        "temperature",
-        "top_p",
-        "max_tokens",
-        "stream",
-        "stop"
-      ]
-    },
    {
      "id": "coder-model",
      "object": "model",
      "created": 1771171200,
      "owned_by": "qwen",
      "type": "qwen",
-      "display_name": "Qwen 3.5 Plus",
-      "version": "3.5",
+      "display_name": "Qwen 3.6 Plus",
+      "version": "3.6",
      "description": "efficient hybrid model with leading coding performance",
      "context_length": 1048576,
      "max_completion_tokens": 65536,
@@ -2232,25 +2341,6 @@
        "stream",
        "stop"
      ]
-    },
-    {
-      "id": "vision-model",
-      "object": "model",
-      "created": 1758672000,
-      "owned_by": "qwen",
-      "type": "qwen",
-      "display_name": "Qwen3 Vision Model",
-      "version": "3.0",
-      "description": "Vision model model",
-      "context_length": 32768,
-      "max_completion_tokens": 2048,
-      "supported_parameters": [
-        "temperature",
-        "top_p",
-        "max_tokens",
-        "stream",
-        "stop"
-      ]
    }
  ],
  "iflow": [
@@ -2639,11 +2729,12 @@
      "context_length": 1048576,
      "max_completion_tokens": 65535,
      "thinking": {
-        "min": 128,
-        "max": 32768,
+        "min": 1,
+        "max": 65535,
        "dynamic_allowed": true,
        "levels": [
          "low",
+          "medium",
          "high"
        ]
      }
@@ -2659,11 +2750,12 @@
      "context_length": 1048576,
      "max_completion_tokens": 65535,
      "thinking": {
-        "min": 128,
-        "max": 32768,
+        "min": 1,
+        "max": 65535,
        "dynamic_allowed": true,
        "levels": [
          "low",
+          "medium",
          "high"
        ]
      }
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -137,6 +137,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
+	body = normalizeClaudeTemperatureForThinking(body)

 	// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
 	if countCacheControls(body) == 0 {
@@ -307,6 +308,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A

 	// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
 	body = disableThinkingIfToolChoiceForced(body)
+	body = normalizeClaudeTemperatureForThinking(body)

 	// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
 	if countCacheControls(body) == 0 {
@@ -651,6 +653,25 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
 	return body
 }

+// normalizeClaudeTemperatureForThinking keeps Anthropic message requests valid when
+// thinking is enabled. Anthropic rejects temperatures other than 1 when
+// thinking.type is enabled/adaptive/auto.
+func normalizeClaudeTemperatureForThinking(body []byte) []byte {
+	if !gjson.GetBytes(body, "temperature").Exists() {
+		return body
+	}
+
+	thinkingType := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "thinking.type").String()))
+	switch thinkingType {
+	case "enabled", "adaptive", "auto":
+		if temp := gjson.GetBytes(body, "temperature"); temp.Exists() && temp.Type == gjson.Number && temp.Float() == 1 {
+			return body
+		}
+		body, _ = sjson.SetBytes(body, "temperature", 1)
+	}
+	return body
+}
+
 type compositeReadCloser struct {
 	io.Reader
 	closers []func() error
--- a/internal/runtime/executor/claude_executor_test.go
+++ b/internal/runtime/executor/claude_executor_test.go
@@ -1833,3 +1833,43 @@ func TestApplyCloaking_PreservesConfiguredStrictModeAndSensitiveWordsWhenModeOmi
 		t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got)
 	}
 }
+
+func TestNormalizeClaudeTemperatureForThinking_AdaptiveCoercesToOne(t *testing.T) {
+	payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"}}`)
+	out := normalizeClaudeTemperatureForThinking(payload)
+
+	if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
+		t.Fatalf("temperature = %v, want 1", got)
+	}
+}
+
+func TestNormalizeClaudeTemperatureForThinking_EnabledCoercesToOne(t *testing.T) {
+	payload := []byte(`{"temperature":0.2,"thinking":{"type":"enabled","budget_tokens":2048}}`)
+	out := normalizeClaudeTemperatureForThinking(payload)
+
+	if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
+		t.Fatalf("temperature = %v, want 1", got)
+	}
+}
+
+func TestNormalizeClaudeTemperatureForThinking_NoThinkingLeavesTemperatureAlone(t *testing.T) {
+	payload := []byte(`{"temperature":0,"messages":[{"role":"user","content":"hi"}]}`)
+	out := normalizeClaudeTemperatureForThinking(payload)
+
+	if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
+		t.Fatalf("temperature = %v, want 0", got)
+	}
+}
+
+func TestNormalizeClaudeTemperatureForThinking_AfterForcedToolChoiceKeepsOriginalTemperature(t *testing.T) {
+	payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"},"tool_choice":{"type":"any"}}`)
+	out := disableThinkingIfToolChoiceForced(payload)
+	out = normalizeClaudeTemperatureForThinking(out)
+
+	if gjson.GetBytes(out, "thinking").Exists() {
+		t.Fatalf("thinking should be removed when tool_choice forces tool use")
+	}
+	if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
+		t.Fatalf("temperature = %v, want 0", got)
+	}
+}
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -30,6 +30,8 @@ const (
 	qwenRateLimitWindow = time.Minute // sliding window duration
 )

+var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
+
 // qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
 var qwenBeijingLoc = func() *time.Location {
 	loc, err := time.LoadLocation("Asia/Shanghai")
@@ -170,6 +172,36 @@ func timeUntilNextDay() time.Duration {
 	return tomorrow.Sub(now)
 }

+// ensureQwenSystemMessage prepends a default system message if none exists in "messages".
+func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
+	messages := gjson.GetBytes(payload, "messages")
+	if messages.Exists() && messages.IsArray() {
+		var buf bytes.Buffer
+		buf.WriteByte('[')
+		buf.Write(qwenDefaultSystemMessage)
+		for _, msg := range messages.Array() {
+			buf.WriteByte(',')
+			buf.WriteString(msg.Raw)
+		}
+		buf.WriteByte(']')
+		updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
+		if errSet != nil {
+			return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
+		}
+		return updated, nil
+	}
+
+	var buf bytes.Buffer
+	buf.WriteByte('[')
+	buf.Write(qwenDefaultSystemMessage)
+	buf.WriteByte(']')
+	updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
+	if errSet != nil {
+		return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
+	}
+	return updated, nil
+}
+
 // QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
 // If access token is unavailable, it falls back to legacy via ClientAdapter.
 type QwenExecutor struct {
@@ -251,6 +283,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req

 	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
 	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, err = ensureQwenSystemMessage(body)
+	if err != nil {
+		return resp, err
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -357,15 +393,19 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		return nil, err
 	}

-	toolsResult := gjson.GetBytes(body, "tools")
+	// toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
 	// This will have no real consequences. It's just to scare Qwen3.
-	if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
-		body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
-	}
+	// if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
+	// 	body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
+	// }
 	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
 	requestedModel := helps.PayloadRequestedModel(opts, req.Model)
 	body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
+	body, err = ensureQwenSystemMessage(body)
+	if err != nil {
+		return nil, err
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
--- a/sdk/api/handlers/gemini/gemini-cli_handlers.go
+++ b/sdk/api/handlers/gemini/gemini-cli_handlers.go
@@ -9,6 +9,7 @@ import (
 	"context"
 	"fmt"
 	"io"
+	"net"
 	"net/http"
 	"strings"
 	"time"
@@ -49,7 +50,23 @@ func (h *GeminiCLIAPIHandler) Models() []map[string]any {
 // CLIHandler handles CLI-specific requests for Gemini API operations.
 // It restricts access to localhost only and routes requests to appropriate internal handlers.
 func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
-	if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
+	if h.Cfg == nil || !h.Cfg.EnableGeminiCLIEndpoint {
+		c.JSON(http.StatusForbidden, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Gemini CLI endpoint is disabled",
+				Type:    "forbidden",
+			},
+		})
+		return
+	}
+
+	requestHost := c.Request.Host
+	requestHostname := requestHost
+	if hostname, _, errSplitHostPort := net.SplitHostPort(requestHost); errSplitHostPort == nil {
+		requestHostname = hostname
+	}
+
+	if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") || requestHostname != "127.0.0.1" {
 		c.JSON(http.StatusForbidden, handlers.ErrorResponse{
 			Error: handlers.ErrorDetail{
 				Message: "CLI reply only allow local access",
--- a/sdk/api/handlers/openai/openai_responses_websocket.go
+++ b/sdk/api/handlers/openai/openai_responses_websocket.go
@@ -379,7 +379,7 @@ func shouldReplaceWebsocketTranscript(rawJSON []byte, nextInput gjson.Result) bo

 	for _, item := range nextInput.Array() {
 		switch strings.TrimSpace(item.Get("type").String()) {
-		case "function_call":
+		case "function_call", "custom_tool_call":
 			return true
 		case "message":
 			role := strings.TrimSpace(item.Get("role").String())
@@ -431,7 +431,7 @@ func dedupeFunctionCallsByCallID(rawArray string) (string, error) {
 			continue
 		}
 		itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
-		if itemType == "function_call" {
+		if isResponsesToolCallType(itemType) {
 			callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
 			if callID != "" {
 				if _, ok := seenCallIDs[callID]; ok {
--- a/sdk/api/handlers/openai/openai_responses_websocket_test.go
+++ b/sdk/api/handlers/openai/openai_responses_websocket_test.go
@@ -520,6 +520,92 @@ func TestRepairResponsesWebsocketToolCallsDropsOrphanOutputWhenCallMissing(t *te
 	}
 }

+func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolOutput(t *testing.T) {
+	cache := newWebsocketToolOutputCache(time.Minute, 10)
+	sessionKey := "session-1"
+
+	cacheWarm := []byte(`{"previous_response_id":"resp-1","input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"}]}`)
+	warmed := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, cacheWarm)
+	if gjson.GetBytes(warmed, "input.0.call_id").String() != "call-1" {
+		t.Fatalf("expected warmup output to remain")
+	}
+
+	raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
+	repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
+
+	input := gjson.GetBytes(repaired, "input").Array()
+	if len(input) != 3 {
+		t.Fatalf("repaired input len = %d, want 3", len(input))
+	}
+	if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
+		t.Fatalf("unexpected first item: %s", input[0].Raw)
+	}
+	if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
+		t.Fatalf("missing inserted output: %s", input[1].Raw)
+	}
+	if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
+		t.Fatalf("unexpected trailing item: %s", input[2].Raw)
+	}
+}
+
+func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolCall(t *testing.T) {
+	cache := newWebsocketToolOutputCache(time.Minute, 10)
+	sessionKey := "session-1"
+
+	raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
+	repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
+
+	input := gjson.GetBytes(repaired, "input").Array()
+	if len(input) != 1 {
+		t.Fatalf("repaired input len = %d, want 1", len(input))
+	}
+	if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
+		t.Fatalf("unexpected remaining item: %s", input[0].Raw)
+	}
+}
+
+func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolCallForOrphanOutput(t *testing.T) {
+	outputCache := newWebsocketToolOutputCache(time.Minute, 10)
+	callCache := newWebsocketToolOutputCache(time.Minute, 10)
+	sessionKey := "session-1"
+
+	callCache.record(sessionKey, "call-1", []byte(`{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"}`))
+
+	raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
+	repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
+
+	input := gjson.GetBytes(repaired, "input").Array()
+	if len(input) != 3 {
+		t.Fatalf("repaired input len = %d, want 3", len(input))
+	}
+	if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
+		t.Fatalf("missing inserted call: %s", input[0].Raw)
+	}
+	if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
+		t.Fatalf("unexpected output item: %s", input[1].Raw)
+	}
+	if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
+		t.Fatalf("unexpected trailing item: %s", input[2].Raw)
+	}
+}
+
+func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolOutputWhenCallMissing(t *testing.T) {
+	outputCache := newWebsocketToolOutputCache(time.Minute, 10)
+	callCache := newWebsocketToolOutputCache(time.Minute, 10)
+	sessionKey := "session-1"
+
+	raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
+	repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
+
+	input := gjson.GetBytes(repaired, "input").Array()
+	if len(input) != 1 {
+		t.Fatalf("repaired input len = %d, want 1", len(input))
+	}
+	if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
+		t.Fatalf("unexpected remaining item: %s", input[0].Raw)
+	}
+}
+
 func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
 	cache := newWebsocketToolOutputCache(time.Minute, 10)
 	sessionKey := "session-1"
@@ -536,6 +622,38 @@ func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
 	}
 }

+func TestRecordResponsesWebsocketCustomToolCallsFromCompletedPayloadWithCache(t *testing.T) {
+	cache := newWebsocketToolOutputCache(time.Minute, 10)
+	sessionKey := "session-1"
+
+	payload := []byte(`{"type":"response.completed","response":{"id":"resp-1","output":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}]}}`)
+	recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
+
+	cached, ok := cache.get(sessionKey, "call-1")
+	if !ok {
+		t.Fatalf("expected cached custom tool call")
+	}
+	if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
+		t.Fatalf("unexpected cached custom tool call: %s", cached)
+	}
+}
+
+func TestRecordResponsesWebsocketCustomToolCallsFromOutputItemDoneWithCache(t *testing.T) {
+	cache := newWebsocketToolOutputCache(time.Minute, 10)
+	sessionKey := "session-1"
+
+	payload := []byte(`{"type":"response.output_item.done","item":{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}}`)
+	recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
+
+	cached, ok := cache.get(sessionKey, "call-1")
+	if !ok {
+		t.Fatalf("expected cached custom tool call")
+	}
+	if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
+		t.Fatalf("unexpected cached custom tool call: %s", cached)
+	}
+}
+
 func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
 	gin.SetMode(gin.TestMode)

@@ -1023,6 +1141,161 @@ func TestNormalizeResponsesWebsocketRequestDropsDuplicateFunctionCallsByCallID(t
 	}
 }

+func TestNormalizeResponsesWebsocketRequestTreatsCustomToolTranscriptReplacementAsReset(t *testing.T) {
+	lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"message","id":"msg-1"},{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"},{"type":"message","id":"assistant-1","role":"assistant"}]}`)
+	lastResponseOutput := []byte(`[
+		{"type":"message","id":"assistant-1","role":"assistant"}
+	]`)
+	raw := []byte(`{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`)
+
+	normalized, next, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
+	if errMsg != nil {
+		t.Fatalf("unexpected error: %v", errMsg.Error)
+	}
+	if gjson.GetBytes(normalized, "previous_response_id").Exists() {
+		t.Fatalf("previous_response_id must not exist in transcript replacement mode")
+	}
+	items := gjson.GetBytes(normalized, "input").Array()
+	if len(items) != 3 {
+		t.Fatalf("replacement input len = %d, want 3: %s", len(items), normalized)
+	}
+	if items[0].Get("id").String() != "ctc-compact" ||
+		items[1].Get("id").String() != "tool-out-compact" ||
+		items[2].Get("id").String() != "msg-2" {
+		t.Fatalf("replacement transcript was not preserved as-is: %s", normalized)
+	}
+	if !bytes.Equal(next, normalized) {
+		t.Fatalf("next request snapshot should match replacement request")
+	}
+}
+
+func TestNormalizeResponsesWebsocketRequestDropsDuplicateCustomToolCallsByCallID(t *testing.T) {
+	lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"}]}`)
+	lastResponseOutput := []byte(`[
+		{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"}
+	]`)
+	raw := []byte(`{"type":"response.create","input":[{"type":"message","id":"msg-2"}]}`)
+
+	normalized, _, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
+	if errMsg != nil {
+		t.Fatalf("unexpected error: %v", errMsg.Error)
+	}
+
+	items := gjson.GetBytes(normalized, "input").Array()
+	if len(items) != 3 {
+		t.Fatalf("merged input len = %d, want 3: %s", len(items), normalized)
+	}
+	if items[0].Get("id").String() != "ctc-1" ||
+		items[1].Get("id").String() != "tool-out-1" ||
+		items[2].Get("id").String() != "msg-2" {
+		t.Fatalf("unexpected merged input order: %s", normalized)
+	}
+}
+
+func TestResponsesWebsocketCompactionResetsTurnStateOnCustomToolTranscriptReplacement(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	executor := &websocketCompactionCaptureExecutor{}
+	manager := coreauth.NewManager(nil, nil, nil)
+	manager.RegisterExecutor(executor)
+	auth := &coreauth.Auth{ID: "auth-sse", Provider: executor.Identifier(), Status: coreauth.StatusActive}
+	if _, err := manager.Register(context.Background(), auth); err != nil {
+		t.Fatalf("Register auth: %v", err)
+	}
+	registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}})
+	t.Cleanup(func() {
+		registry.GetGlobalRegistry().UnregisterClient(auth.ID)
+	})
+
+	base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
+	h := NewOpenAIResponsesAPIHandler(base)
+	router := gin.New()
+	router.GET("/v1/responses/ws", h.ResponsesWebsocket)
+	router.POST("/v1/responses/compact", h.Compact)
+
+	server := httptest.NewServer(router)
+	defer server.Close()
+
+	wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + "/v1/responses/ws"
+	conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
+	if err != nil {
+		t.Fatalf("dial websocket: %v", err)
+	}
+	defer func() {
+		if errClose := conn.Close(); errClose != nil {
+			t.Fatalf("close websocket: %v", errClose)
+		}
+	}()
+
+	requests := []string{
+		`{"type":"response.create","model":"test-model","input":[{"type":"message","id":"msg-1"}]}`,
+		`{"type":"response.create","input":[{"type":"custom_tool_call_output","call_id":"call-1","id":"tool-out-1"}]}`,
+	}
+	for i := range requests {
+		if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(requests[i])); errWrite != nil {
+			t.Fatalf("write websocket message %d: %v", i+1, errWrite)
+		}
+		_, payload, errReadMessage := conn.ReadMessage()
+		if errReadMessage != nil {
+			t.Fatalf("read websocket message %d: %v", i+1, errReadMessage)
+		}
+		if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
+			t.Fatalf("message %d payload type = %s, want %s", i+1, got, wsEventTypeCompleted)
+		}
+	}
+
+	compactResp, errPost := server.Client().Post(
+		server.URL+"/v1/responses/compact",
+		"application/json",
+		strings.NewReader(`{"model":"test-model","input":[{"type":"message","id":"summary-1"}]}`),
+	)
+	if errPost != nil {
+		t.Fatalf("compact request failed: %v", errPost)
+	}
+	if errClose := compactResp.Body.Close(); errClose != nil {
+		t.Fatalf("close compact response body: %v", errClose)
+	}
+	if compactResp.StatusCode != http.StatusOK {
+		t.Fatalf("compact status = %d, want %d", compactResp.StatusCode, http.StatusOK)
+	}
+
+	postCompact := `{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`
+	if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(postCompact)); errWrite != nil {
+		t.Fatalf("write post-compact websocket message: %v", errWrite)
+	}
+	_, payload, errReadMessage := conn.ReadMessage()
+	if errReadMessage != nil {
+		t.Fatalf("read post-compact websocket message: %v", errReadMessage)
+	}
+	if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
+		t.Fatalf("post-compact payload type = %s, want %s", got, wsEventTypeCompleted)
+	}
+
+	executor.mu.Lock()
+	defer executor.mu.Unlock()
+
+	if executor.compactPayload == nil {
+		t.Fatalf("compact payload was not captured")
+	}
+	if len(executor.streamPayloads) != 3 {
+		t.Fatalf("stream payload count = %d, want 3", len(executor.streamPayloads))
+	}
+
+	merged := executor.streamPayloads[2]
+	items := gjson.GetBytes(merged, "input").Array()
+	if len(items) != 3 {
+		t.Fatalf("merged input len = %d, want 3: %s", len(items), merged)
+	}
+	if items[0].Get("id").String() != "ctc-compact" ||
+		items[1].Get("id").String() != "tool-out-compact" ||
+		items[2].Get("id").String() != "msg-2" {
+		t.Fatalf("unexpected post-compact input order: %s", merged)
+	}
+	if items[0].Get("call_id").String() != "call-1" {
+		t.Fatalf("post-compact custom tool call id = %s, want call-1", items[0].Get("call_id").String())
+	}
+}
+
 func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) {
 	gin.SetMode(gin.TestMode)

--- a/sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go
+++ b/sdk/api/handlers/openai/openai_responses_websocket_toolcall_repair.go
@@ -266,15 +266,15 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
 			continue
 		}
 		itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
-		switch itemType {
-		case "function_call_output":
+		switch {
+		case isResponsesToolCallOutputType(itemType):
 			callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
 			if callID == "" {
 				continue
 			}
 			outputPresent[callID] = struct{}{}
 			outputCache.record(sessionKey, callID, item)
-		case "function_call":
+		case isResponsesToolCallType(itemType):
 			callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
 			if callID == "" {
 				continue
@@ -293,7 +293,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
 			continue
 		}
 		itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
-		if itemType == "function_call_output" {
+		if isResponsesToolCallOutputType(itemType) {
 			callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
 			if callID == "" {
 				// Upstream rejects tool outputs without a call_id; drop it.
@@ -325,7 +325,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
 			// Drop orphaned function_call_output items; upstream rejects transcripts with missing calls.
 			continue
 		}
-		if itemType != "function_call" {
+		if !isResponsesToolCallType(itemType) {
 			filtered = append(filtered, item)
 			continue
 		}
@@ -376,7 +376,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
 			return
 		}
 		for _, item := range output.Array() {
-			if strings.TrimSpace(item.Get("type").String()) != "function_call" {
+			if !isResponsesToolCallType(item.Get("type").String()) {
 				continue
 			}
 			callID := strings.TrimSpace(item.Get("call_id").String())
@@ -390,7 +390,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
 		if !item.Exists() || !item.IsObject() {
 			return
 		}
-		if strings.TrimSpace(item.Get("type").String()) != "function_call" {
+		if !isResponsesToolCallType(item.Get("type").String()) {
 			return
 		}
 		callID := strings.TrimSpace(item.Get("call_id").String())
@@ -400,3 +400,21 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
 		cache.record(sessionKey, callID, json.RawMessage(item.Raw))
 	}
 }
+
+func isResponsesToolCallType(itemType string) bool {
+	switch strings.TrimSpace(itemType) {
+	case "function_call", "custom_tool_call":
+		return true
+	default:
+		return false
+	}
+}
+
+func isResponsesToolCallOutputType(itemType string) bool {
+	switch strings.TrimSpace(itemType) {
+	case "function_call_output", "custom_tool_call_output":
+		return true
+	default:
+		return false
+	}
+}
Author	SHA1	Message	Date
Luis Pater	e7a66ae504	Merge branch 'router-for-me:main' into main	2026-04-04 02:18:06 +08:00
Luis Pater	754b126944	fix(executor): remove commented-out code in QwenExecutor	2026-04-04 02:14:48 +08:00
Luis Pater	ae37ccffbf	Merge pull request #2520 from Arronlong/main fix:qwen invalid_parameter_error	2026-04-04 02:13:09 +08:00
Luis Pater	42c062bb5b	Merge pull request #2509 from adamhelfgott/fix-claude-thinking-temperature Normalize Claude temperature when thinking is enabled	2026-04-03 23:55:50 +08:00
Luis Pater	f389667ec3	Merge pull request #2513 from lonr-6/codex/fix-ws-custom-tool-repair-v2 fix: repair responses websocket custom tool call pairing	2026-04-03 23:45:38 +08:00
Arronlong	29dba0399b	Comment out system message check in Qwen executor fix qwen invalid_parameter_error	2026-04-03 23:07:33 +08:00
Luis Pater	a824e7cd0b	feat(models): add GPT-5.3, GPT-5.4, and GPT-5.4-mini with enhanced "thinking" levels	2026-04-03 23:05:10 +08:00
Luis Pater	140faef7dc	Merge branch 'router-for-me:main' into main	2026-04-03 21:48:23 +08:00
Luis Pater	adb580b344	feat(security): add configuration to toggle Gemini CLI endpoint access Closes: #2445	2026-04-03 21:46:49 +08:00
Luis Pater	06405f2129	fix(security): enforce stricter localhost validation for GeminiCLIAPIHandler Closes: #2445	2026-04-03 21:22:03 +08:00
Kai Wang	d1fd2c4ad4	fix: repair websocket custom tool calls	2026-04-03 17:11:44 +08:00
Kai Wang	b6c6379bfa	fix: repair websocket custom tool calls	2026-04-03 17:11:42 +08:00
Kai Wang	8f0e66b72e	fix: repair websocket custom tool calls	2026-04-03 17:11:41 +08:00
Adam Helfgott	f63cf6ff7a	Normalize Claude temperature for thinking	2026-04-03 03:45:51 -04:00
Luis Pater	d2419ed49d	feat(executor): ensure default system message in QwenExecutor payload	2026-04-03 11:18:48 +08:00