Merge pull request #2491 from mpfo0106/feature/claude-code-safe-alignment-sentinels

test(claude): add compatibility sentinels and centralize builtin fallback handling
This commit is contained in:
Luis Pater
2026-04-06 09:27:08 +08:00
committed by GitHub
9 changed files with 243 additions and 6 deletions

View File

@@ -951,12 +951,9 @@ func applyClaudeToolPrefix(body []byte, prefix string) []byte {
return body
}
// Collect built-in tool names (those with a non-empty "type" field) so we can
// skip them consistently in both tools and message history.
builtinTools := map[string]bool{}
for _, name := range []string{"web_search", "code_execution", "text_editor", "computer"} {
builtinTools[name] = true
}
// Collect built-in tool names from the authoritative fallback seed list and
// augment it with any typed built-ins present in the current request body.
builtinTools := helps.AugmentClaudeBuiltinToolRegistry(body, nil)
if tools := gjson.GetBytes(body, "tools"); tools.Exists() && tools.IsArray() {
tools.ForEach(func(index, tool gjson.Result) bool {

View File

@@ -739,6 +739,35 @@ func TestApplyClaudeToolPrefix_ToolChoiceBuiltin(t *testing.T) {
}
}
func TestApplyClaudeToolPrefix_KnownFallbackBuiltinsRemainUnprefixed(t *testing.T) {
for _, builtin := range []string{"web_search", "code_execution", "text_editor", "computer"} {
t.Run(builtin, func(t *testing.T) {
input := []byte(fmt.Sprintf(`{
"tools":[{"name":"Read"}],
"tool_choice":{"type":"tool","name":%q},
"messages":[{"role":"assistant","content":[{"type":"tool_use","name":%q,"id":"toolu_1","input":{}},{"type":"tool_reference","tool_name":%q},{"type":"tool_result","tool_use_id":"toolu_1","content":[{"type":"tool_reference","tool_name":%q}]}]}]
}`, builtin, builtin, builtin, builtin))
out := applyClaudeToolPrefix(input, "proxy_")
if got := gjson.GetBytes(out, "tool_choice.name").String(); got != builtin {
t.Fatalf("tool_choice.name = %q, want %q", got, builtin)
}
if got := gjson.GetBytes(out, "messages.0.content.0.name").String(); got != builtin {
t.Fatalf("messages.0.content.0.name = %q, want %q", got, builtin)
}
if got := gjson.GetBytes(out, "messages.0.content.1.tool_name").String(); got != builtin {
t.Fatalf("messages.0.content.1.tool_name = %q, want %q", got, builtin)
}
if got := gjson.GetBytes(out, "messages.0.content.2.content.0.tool_name").String(); got != builtin {
t.Fatalf("messages.0.content.2.content.0.tool_name = %q, want %q", got, builtin)
}
if got := gjson.GetBytes(out, "tools.0.name").String(); got != "proxy_Read" {
t.Fatalf("tools.0.name = %q, want %q", got, "proxy_Read")
}
})
}
}
func TestStripClaudeToolPrefixFromResponse(t *testing.T) {
input := []byte(`{"content":[{"type":"tool_use","name":"proxy_alpha","id":"t1","input":{}},{"type":"tool_use","name":"bravo","id":"t2","input":{}}]}`)
out := stripClaudeToolPrefixFromResponse(input, "proxy_")

View File

@@ -0,0 +1,38 @@
package helps
import "github.com/tidwall/gjson"
var defaultClaudeBuiltinToolNames = []string{
"web_search",
"code_execution",
"text_editor",
"computer",
}
func newClaudeBuiltinToolRegistry() map[string]bool {
registry := make(map[string]bool, len(defaultClaudeBuiltinToolNames))
for _, name := range defaultClaudeBuiltinToolNames {
registry[name] = true
}
return registry
}
func AugmentClaudeBuiltinToolRegistry(body []byte, registry map[string]bool) map[string]bool {
if registry == nil {
registry = newClaudeBuiltinToolRegistry()
}
tools := gjson.GetBytes(body, "tools")
if !tools.Exists() || !tools.IsArray() {
return registry
}
tools.ForEach(func(_, tool gjson.Result) bool {
if tool.Get("type").String() == "" {
return true
}
if name := tool.Get("name").String(); name != "" {
registry[name] = true
}
return true
})
return registry
}

View File

@@ -0,0 +1,32 @@
package helps
import "testing"
func TestClaudeBuiltinToolRegistry_DefaultSeedFallback(t *testing.T) {
registry := AugmentClaudeBuiltinToolRegistry(nil, nil)
for _, name := range defaultClaudeBuiltinToolNames {
if !registry[name] {
t.Fatalf("default builtin %q missing from fallback registry", name)
}
}
}
func TestClaudeBuiltinToolRegistry_AugmentsTypedBuiltinsFromBody(t *testing.T) {
registry := AugmentClaudeBuiltinToolRegistry([]byte(`{
"tools": [
{"type": "web_search_20250305", "name": "web_search"},
{"type": "custom_builtin_20250401", "name": "special_builtin"},
{"name": "Read"}
]
}`), nil)
if !registry["web_search"] {
t.Fatal("expected default typed builtin web_search in registry")
}
if !registry["special_builtin"] {
t.Fatal("expected typed builtin from body to be added to registry")
}
if registry["Read"] {
t.Fatal("expected untyped custom tool to stay out of builtin registry")
}
}

View File

@@ -0,0 +1,106 @@
package test
import (
"encoding/json"
"os"
"path/filepath"
"testing"
)
type jsonObject = map[string]any
func loadClaudeCodeSentinelFixture(t *testing.T, name string) jsonObject {
t.Helper()
path := filepath.Join("testdata", "claude_code_sentinels", name)
data := mustReadFile(t, path)
var payload jsonObject
if err := json.Unmarshal(data, &payload); err != nil {
t.Fatalf("unmarshal %s: %v", name, err)
}
return payload
}
func mustReadFile(t *testing.T, path string) []byte {
t.Helper()
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read %s: %v", path, err)
}
return data
}
func requireStringField(t *testing.T, obj jsonObject, key string) string {
t.Helper()
value, ok := obj[key].(string)
if !ok || value == "" {
t.Fatalf("field %q missing or empty: %#v", key, obj[key])
}
return value
}
func TestClaudeCodeSentinel_ToolProgressShape(t *testing.T) {
payload := loadClaudeCodeSentinelFixture(t, "tool_progress.json")
if got := requireStringField(t, payload, "type"); got != "tool_progress" {
t.Fatalf("type = %q, want tool_progress", got)
}
requireStringField(t, payload, "tool_use_id")
requireStringField(t, payload, "tool_name")
requireStringField(t, payload, "session_id")
if _, ok := payload["elapsed_time_seconds"].(float64); !ok {
t.Fatalf("elapsed_time_seconds missing or non-number: %#v", payload["elapsed_time_seconds"])
}
}
func TestClaudeCodeSentinel_SessionStateShape(t *testing.T) {
payload := loadClaudeCodeSentinelFixture(t, "session_state_changed.json")
if got := requireStringField(t, payload, "type"); got != "system" {
t.Fatalf("type = %q, want system", got)
}
if got := requireStringField(t, payload, "subtype"); got != "session_state_changed" {
t.Fatalf("subtype = %q, want session_state_changed", got)
}
state := requireStringField(t, payload, "state")
switch state {
case "idle", "running", "requires_action":
default:
t.Fatalf("unexpected session state %q", state)
}
requireStringField(t, payload, "session_id")
}
func TestClaudeCodeSentinel_ToolUseSummaryShape(t *testing.T) {
payload := loadClaudeCodeSentinelFixture(t, "tool_use_summary.json")
if got := requireStringField(t, payload, "type"); got != "tool_use_summary" {
t.Fatalf("type = %q, want tool_use_summary", got)
}
requireStringField(t, payload, "summary")
rawIDs, ok := payload["preceding_tool_use_ids"].([]any)
if !ok || len(rawIDs) == 0 {
t.Fatalf("preceding_tool_use_ids missing or empty: %#v", payload["preceding_tool_use_ids"])
}
for i, raw := range rawIDs {
if id, ok := raw.(string); !ok || id == "" {
t.Fatalf("preceding_tool_use_ids[%d] invalid: %#v", i, raw)
}
}
}
func TestClaudeCodeSentinel_ControlRequestCanUseToolShape(t *testing.T) {
payload := loadClaudeCodeSentinelFixture(t, "control_request_can_use_tool.json")
if got := requireStringField(t, payload, "type"); got != "control_request" {
t.Fatalf("type = %q, want control_request", got)
}
requireStringField(t, payload, "request_id")
request, ok := payload["request"].(map[string]any)
if !ok {
t.Fatalf("request missing or invalid: %#v", payload["request"])
}
if got := requireStringField(t, request, "subtype"); got != "can_use_tool" {
t.Fatalf("request.subtype = %q, want can_use_tool", got)
}
requireStringField(t, request, "tool_name")
requireStringField(t, request, "tool_use_id")
if input, ok := request["input"].(map[string]any); !ok || len(input) == 0 {
t.Fatalf("request.input missing or empty: %#v", request["input"])
}
}

View File

@@ -0,0 +1,11 @@
{
"type": "control_request",
"request_id": "req_123",
"request": {
"subtype": "can_use_tool",
"tool_name": "Bash",
"input": {"command": "npm test"},
"tool_use_id": "toolu_123",
"description": "Running npm test"
}
}

View File

@@ -0,0 +1,7 @@
{
"type": "system",
"subtype": "session_state_changed",
"state": "requires_action",
"uuid": "22222222-2222-4222-8222-222222222222",
"session_id": "sess_123"
}

View File

@@ -0,0 +1,10 @@
{
"type": "tool_progress",
"tool_use_id": "toolu_123",
"tool_name": "Bash",
"parent_tool_use_id": null,
"elapsed_time_seconds": 2.5,
"task_id": "task_123",
"uuid": "11111111-1111-4111-8111-111111111111",
"session_id": "sess_123"
}

View File

@@ -0,0 +1,7 @@
{
"type": "tool_use_summary",
"summary": "Searched in auth/",
"preceding_tool_use_ids": ["toolu_1", "toolu_2"],
"uuid": "33333333-3333-4333-8333-333333333333",
"session_id": "sess_123"
}