diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 6f218f96..fced14d8 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -951,12 +951,9 @@ func applyClaudeToolPrefix(body []byte, prefix string) []byte { return body } - // Collect built-in tool names (those with a non-empty "type" field) so we can - // skip them consistently in both tools and message history. - builtinTools := map[string]bool{} - for _, name := range []string{"web_search", "code_execution", "text_editor", "computer"} { - builtinTools[name] = true - } + // Collect built-in tool names from the authoritative fallback seed list and + // augment it with any typed built-ins present in the current request body. + builtinTools := helps.AugmentClaudeBuiltinToolRegistry(body, nil) if tools := gjson.GetBytes(body, "tools"); tools.Exists() && tools.IsArray() { tools.ForEach(func(index, tool gjson.Result) bool { diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index c6220fe9..2cf969bb 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -739,6 +739,35 @@ func TestApplyClaudeToolPrefix_ToolChoiceBuiltin(t *testing.T) { } } +func TestApplyClaudeToolPrefix_KnownFallbackBuiltinsRemainUnprefixed(t *testing.T) { + for _, builtin := range []string{"web_search", "code_execution", "text_editor", "computer"} { + t.Run(builtin, func(t *testing.T) { + input := []byte(fmt.Sprintf(`{ + "tools":[{"name":"Read"}], + "tool_choice":{"type":"tool","name":%q}, + "messages":[{"role":"assistant","content":[{"type":"tool_use","name":%q,"id":"toolu_1","input":{}},{"type":"tool_reference","tool_name":%q},{"type":"tool_result","tool_use_id":"toolu_1","content":[{"type":"tool_reference","tool_name":%q}]}]}] + }`, builtin, builtin, builtin, builtin)) + out := applyClaudeToolPrefix(input, "proxy_") + + if got := gjson.GetBytes(out, "tool_choice.name").String(); got != builtin { + t.Fatalf("tool_choice.name = %q, want %q", got, builtin) + } + if got := gjson.GetBytes(out, "messages.0.content.0.name").String(); got != builtin { + t.Fatalf("messages.0.content.0.name = %q, want %q", got, builtin) + } + if got := gjson.GetBytes(out, "messages.0.content.1.tool_name").String(); got != builtin { + t.Fatalf("messages.0.content.1.tool_name = %q, want %q", got, builtin) + } + if got := gjson.GetBytes(out, "messages.0.content.2.content.0.tool_name").String(); got != builtin { + t.Fatalf("messages.0.content.2.content.0.tool_name = %q, want %q", got, builtin) + } + if got := gjson.GetBytes(out, "tools.0.name").String(); got != "proxy_Read" { + t.Fatalf("tools.0.name = %q, want %q", got, "proxy_Read") + } + }) + } +} + func TestStripClaudeToolPrefixFromResponse(t *testing.T) { input := []byte(`{"content":[{"type":"tool_use","name":"proxy_alpha","id":"t1","input":{}},{"type":"tool_use","name":"bravo","id":"t2","input":{}}]}`) out := stripClaudeToolPrefixFromResponse(input, "proxy_") diff --git a/internal/runtime/executor/helps/claude_builtin_tools.go b/internal/runtime/executor/helps/claude_builtin_tools.go new file mode 100644 index 00000000..5ee2b08d --- /dev/null +++ b/internal/runtime/executor/helps/claude_builtin_tools.go @@ -0,0 +1,38 @@ +package helps + +import "github.com/tidwall/gjson" + +var defaultClaudeBuiltinToolNames = []string{ + "web_search", + "code_execution", + "text_editor", + "computer", +} + +func newClaudeBuiltinToolRegistry() map[string]bool { + registry := make(map[string]bool, len(defaultClaudeBuiltinToolNames)) + for _, name := range defaultClaudeBuiltinToolNames { + registry[name] = true + } + return registry +} + +func AugmentClaudeBuiltinToolRegistry(body []byte, registry map[string]bool) map[string]bool { + if registry == nil { + registry = newClaudeBuiltinToolRegistry() + } + tools := gjson.GetBytes(body, "tools") + if !tools.Exists() || !tools.IsArray() { + return registry + } + tools.ForEach(func(_, tool gjson.Result) bool { + if tool.Get("type").String() == "" { + return true + } + if name := tool.Get("name").String(); name != "" { + registry[name] = true + } + return true + }) + return registry +} diff --git a/internal/runtime/executor/helps/claude_builtin_tools_test.go b/internal/runtime/executor/helps/claude_builtin_tools_test.go new file mode 100644 index 00000000..d7badd19 --- /dev/null +++ b/internal/runtime/executor/helps/claude_builtin_tools_test.go @@ -0,0 +1,32 @@ +package helps + +import "testing" + +func TestClaudeBuiltinToolRegistry_DefaultSeedFallback(t *testing.T) { + registry := AugmentClaudeBuiltinToolRegistry(nil, nil) + for _, name := range defaultClaudeBuiltinToolNames { + if !registry[name] { + t.Fatalf("default builtin %q missing from fallback registry", name) + } + } +} + +func TestClaudeBuiltinToolRegistry_AugmentsTypedBuiltinsFromBody(t *testing.T) { + registry := AugmentClaudeBuiltinToolRegistry([]byte(`{ + "tools": [ + {"type": "web_search_20250305", "name": "web_search"}, + {"type": "custom_builtin_20250401", "name": "special_builtin"}, + {"name": "Read"} + ] + }`), nil) + + if !registry["web_search"] { + t.Fatal("expected default typed builtin web_search in registry") + } + if !registry["special_builtin"] { + t.Fatal("expected typed builtin from body to be added to registry") + } + if registry["Read"] { + t.Fatal("expected untyped custom tool to stay out of builtin registry") + } +} diff --git a/test/claude_code_compatibility_sentinel_test.go b/test/claude_code_compatibility_sentinel_test.go new file mode 100644 index 00000000..793b3c6a --- /dev/null +++ b/test/claude_code_compatibility_sentinel_test.go @@ -0,0 +1,106 @@ +package test + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" +) + +type jsonObject = map[string]any + +func loadClaudeCodeSentinelFixture(t *testing.T, name string) jsonObject { + t.Helper() + path := filepath.Join("testdata", "claude_code_sentinels", name) + data := mustReadFile(t, path) + var payload jsonObject + if err := json.Unmarshal(data, &payload); err != nil { + t.Fatalf("unmarshal %s: %v", name, err) + } + return payload +} + +func mustReadFile(t *testing.T, path string) []byte { + t.Helper() + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + return data +} + +func requireStringField(t *testing.T, obj jsonObject, key string) string { + t.Helper() + value, ok := obj[key].(string) + if !ok || value == "" { + t.Fatalf("field %q missing or empty: %#v", key, obj[key]) + } + return value +} + +func TestClaudeCodeSentinel_ToolProgressShape(t *testing.T) { + payload := loadClaudeCodeSentinelFixture(t, "tool_progress.json") + if got := requireStringField(t, payload, "type"); got != "tool_progress" { + t.Fatalf("type = %q, want tool_progress", got) + } + requireStringField(t, payload, "tool_use_id") + requireStringField(t, payload, "tool_name") + requireStringField(t, payload, "session_id") + if _, ok := payload["elapsed_time_seconds"].(float64); !ok { + t.Fatalf("elapsed_time_seconds missing or non-number: %#v", payload["elapsed_time_seconds"]) + } +} + +func TestClaudeCodeSentinel_SessionStateShape(t *testing.T) { + payload := loadClaudeCodeSentinelFixture(t, "session_state_changed.json") + if got := requireStringField(t, payload, "type"); got != "system" { + t.Fatalf("type = %q, want system", got) + } + if got := requireStringField(t, payload, "subtype"); got != "session_state_changed" { + t.Fatalf("subtype = %q, want session_state_changed", got) + } + state := requireStringField(t, payload, "state") + switch state { + case "idle", "running", "requires_action": + default: + t.Fatalf("unexpected session state %q", state) + } + requireStringField(t, payload, "session_id") +} + +func TestClaudeCodeSentinel_ToolUseSummaryShape(t *testing.T) { + payload := loadClaudeCodeSentinelFixture(t, "tool_use_summary.json") + if got := requireStringField(t, payload, "type"); got != "tool_use_summary" { + t.Fatalf("type = %q, want tool_use_summary", got) + } + requireStringField(t, payload, "summary") + rawIDs, ok := payload["preceding_tool_use_ids"].([]any) + if !ok || len(rawIDs) == 0 { + t.Fatalf("preceding_tool_use_ids missing or empty: %#v", payload["preceding_tool_use_ids"]) + } + for i, raw := range rawIDs { + if id, ok := raw.(string); !ok || id == "" { + t.Fatalf("preceding_tool_use_ids[%d] invalid: %#v", i, raw) + } + } +} + +func TestClaudeCodeSentinel_ControlRequestCanUseToolShape(t *testing.T) { + payload := loadClaudeCodeSentinelFixture(t, "control_request_can_use_tool.json") + if got := requireStringField(t, payload, "type"); got != "control_request" { + t.Fatalf("type = %q, want control_request", got) + } + requireStringField(t, payload, "request_id") + request, ok := payload["request"].(map[string]any) + if !ok { + t.Fatalf("request missing or invalid: %#v", payload["request"]) + } + if got := requireStringField(t, request, "subtype"); got != "can_use_tool" { + t.Fatalf("request.subtype = %q, want can_use_tool", got) + } + requireStringField(t, request, "tool_name") + requireStringField(t, request, "tool_use_id") + if input, ok := request["input"].(map[string]any); !ok || len(input) == 0 { + t.Fatalf("request.input missing or empty: %#v", request["input"]) + } +} diff --git a/test/testdata/claude_code_sentinels/control_request_can_use_tool.json b/test/testdata/claude_code_sentinels/control_request_can_use_tool.json new file mode 100644 index 00000000..cafdb00a --- /dev/null +++ b/test/testdata/claude_code_sentinels/control_request_can_use_tool.json @@ -0,0 +1,11 @@ +{ + "type": "control_request", + "request_id": "req_123", + "request": { + "subtype": "can_use_tool", + "tool_name": "Bash", + "input": {"command": "npm test"}, + "tool_use_id": "toolu_123", + "description": "Running npm test" + } +} diff --git a/test/testdata/claude_code_sentinels/session_state_changed.json b/test/testdata/claude_code_sentinels/session_state_changed.json new file mode 100644 index 00000000..db411ace --- /dev/null +++ b/test/testdata/claude_code_sentinels/session_state_changed.json @@ -0,0 +1,7 @@ +{ + "type": "system", + "subtype": "session_state_changed", + "state": "requires_action", + "uuid": "22222222-2222-4222-8222-222222222222", + "session_id": "sess_123" +} diff --git a/test/testdata/claude_code_sentinels/tool_progress.json b/test/testdata/claude_code_sentinels/tool_progress.json new file mode 100644 index 00000000..45a3a22e --- /dev/null +++ b/test/testdata/claude_code_sentinels/tool_progress.json @@ -0,0 +1,10 @@ +{ + "type": "tool_progress", + "tool_use_id": "toolu_123", + "tool_name": "Bash", + "parent_tool_use_id": null, + "elapsed_time_seconds": 2.5, + "task_id": "task_123", + "uuid": "11111111-1111-4111-8111-111111111111", + "session_id": "sess_123" +} diff --git a/test/testdata/claude_code_sentinels/tool_use_summary.json b/test/testdata/claude_code_sentinels/tool_use_summary.json new file mode 100644 index 00000000..da3c4c3e --- /dev/null +++ b/test/testdata/claude_code_sentinels/tool_use_summary.json @@ -0,0 +1,7 @@ +{ + "type": "tool_use_summary", + "summary": "Searched in auth/", + "preceding_tool_use_ids": ["toolu_1", "toolu_2"], + "uuid": "33333333-3333-4333-8333-333333333333", + "session_id": "sess_123" +}