test: cover codex app-server subagents

This commit is contained in:
Peter Steinberger
2026-04-26 03:45:00 +01:00
parent c149de7750
commit 0ddbae171d
14 changed files with 448 additions and 83 deletions

View File

@@ -85,21 +85,59 @@ Docs: https://docs.openclaw.ai
and show daemon state separately when available, so `gateway.tailscale.mode:
"off"` no longer reads like the Tailscale daemon is stopped. Fixes #71790.
Thanks @pesvobodak.
- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables itself for the current Gateway process after repeated failed restarts while the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux, @FiredMosquito831, and @spikefcz.
- Gateway/Fly.io: seed Control UI allowed origins from the actual runtime bind and port so CLI-driven non-loopback starts do not crash before config exists. Fixes #71823.
- Gateway/proxy: bootstrap env proxy dispatching from direct Gateway startup so provider and plugin network requests honor `HTTPS_PROXY`/`HTTP_PROXY` before the first embedded agent attempt runs. (#71833) Thanks @mjamiv.
- Models/LM Studio: preserve `@iq*` quant suffixes in model refs and provider matching so `/model lmstudio/...@iq3_xxs` keeps the exact LM Studio variant. Fixes #71474. (#71486) Thanks @Bartok9, @XinwuC, and @Sanjays2402.
- Matrix/cron: preserve the live Matrix delivery target when creating implicit announce reminder jobs so mixed-case room IDs are not reconstructed from lowercased session keys. Fixes #71798.
- Feishu: accept Schema 2.0 card action callbacks that report `context.open_chat_id` instead of legacy `context.chat_id`, so button callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068.
- Feishu: keep synthetic card-action and bot-menu ids out of platform reply targets, using the real card callback message id when Feishu provides one and plain-sending otherwise. Fixes #71673. Thanks @eddy1068.
- Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and `qqbot_remind` tool registration noise. Fixes #63102.
- Browser automation: keep stable tab ids and labels attached when Chromium replaces the raw target after form submissions or other action-triggered navigations, and return the replacement `targetId` from `/act` when the match is provable. Fixes #46137.
- QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs directly for owner-authorized senders instead of returning `cronParams` and relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937) Thanks @GaosCode.
- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled `acpx`. Thanks @vincentkoc.
- Media delivery: avoid sending generated image attachments twice when the assistant reply already includes explicit `MEDIA:` lines for the same turn, and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash.
- Codex harness: ignore retryable app-server error notifications after Codex recovers, and preserve the real nested error message for terminal app-server failures instead of replacing it with a generic failure. Thanks @pashpashpash.
- Agents/subagents: keep queued subagent announces session-only when the requester has no external channel target, avoiding ambiguous multi-channel delivery failures. Fixes #59201. Thanks @larrylhollan.
- Image understanding: preserve configured provider-prefixed vision model metadata when callers request the model without the provider prefix, so custom image models keep their `input: ["text", "image"]` capability. Fixes #33185. Thanks @Kobe9312 and @vincentkoc.
- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when
the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables
itself for the current Gateway process after repeated failed restarts while
the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux,
@FiredMosquito831, and @spikefcz.
- Gateway/Fly.io: seed Control UI allowed origins from the actual runtime
bind and port so CLI-driven non-loopback starts do not crash before config
exists. Fixes #71823.
- Gateway/proxy: bootstrap env proxy dispatching from direct Gateway startup
so provider and plugin network requests honor `HTTPS_PROXY`/`HTTP_PROXY`
before the first embedded agent attempt runs. (#71833) Thanks @mjamiv.
- Models/LM Studio: preserve `@iq*` quant suffixes in model refs and provider
matching so `/model lmstudio/...@iq3_xxs` keeps the exact LM Studio variant.
Fixes #71474. (#71486) Thanks @Bartok9, @XinwuC, and @Sanjays2402.
- Matrix/cron: preserve the live Matrix delivery target when creating implicit
announce reminder jobs so mixed-case room IDs are not reconstructed from
lowercased session keys. Fixes #71798.
- Feishu: accept Schema 2.0 card action callbacks that report
`context.open_chat_id` instead of legacy `context.chat_id`, so button
callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068.
- Feishu: keep synthetic card-action and bot-menu ids out of platform reply
targets, using the real card callback message id when Feishu provides one and
plain-sending otherwise. Fixes #71673. Thanks @eddy1068.
- Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces
the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and
`qqbot_remind` tool registration noise. Fixes #63102.
- Browser automation: keep stable tab ids and labels attached when Chromium
replaces the raw target after form submissions or other action-triggered
navigations, and return the replacement `targetId` from `/act` when the match
is provable. Fixes #46137.
- QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs
directly for owner-authorized senders instead of returning `cronParams` and
relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937)
Thanks @GaosCode.
- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is
loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled
`acpx`. Thanks @vincentkoc.
- Media delivery: avoid sending generated image attachments twice when the
assistant reply already includes explicit `MEDIA:` lines for the same turn,
and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash.
- Codex harness: ignore retryable app-server error notifications after Codex
recovers, and preserve the real nested error message for terminal app-server
failures instead of replacing it with a generic failure. Thanks @pashpashpash.
- Agents/Codex: prepare native Codex sub-agent session metadata without a
nested Gateway session patch and add a focused Docker smoke for the app-server
sub-agent path. Thanks @vincentkoc.
- Agents/subagents: keep queued subagent announces session-only when the
requester has no external channel target, avoiding ambiguous multi-channel
delivery failures. Fixes #59201. Thanks @larrylhollan.
- Image understanding: preserve configured provider-prefixed vision model
metadata when callers request the model without the provider prefix, so custom
image models keep their `input: ["text", "image"]` capability. Fixes #33185.
Thanks @Kobe9312 and @vincentkoc.
- Plugins/install: restore the previous plugin index records if a concurrent config write conflict interrupts install, update, or uninstall metadata commits. Thanks @shakkernerd.
- Plugins/update: restore previous plugin index records if core update or channel setup hits a concurrent config write conflict after plugin metadata changes. Thanks @shakkernerd.
- Plugins/onboarding: defer channel/provider plugin install records until the owning config write commits, keeping setup failures from advancing the plugin index ahead of `openclaw.json`. Thanks @shakkernerd.

View File

@@ -55,6 +55,15 @@ When debugging real providers/models (requires real creds):
Slack DM with `/codex bind`, exercises `/codex fast` and
`/codex permissions`, then verifies a plain reply and an image attachment
route through the native plugin binding instead of ACP.
- Codex app-server harness smoke: `pnpm test:docker:live-codex-harness`
- Runs gateway agent turns through the plugin-owned Codex app-server harness,
verifies `/codex status` and `/codex models`, and by default exercises image,
cron MCP, sub-agent, and Guardian probes. Disable the sub-agent probe with
`OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE=0` when isolating other Codex
app-server failures. For a focused sub-agent check, disable the other probes:
`OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE=1 pnpm test:docker:live-codex-harness`.
This exits after the sub-agent probe unless
`OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY=0` is set.
- Crestodian rescue command smoke: `pnpm test:live:crestodian-rescue-channel`
- Opt-in belt-and-suspenders check for the message-channel rescue command
surface. It exercises `/crestodian status`, queues a persistent model

View File

@@ -203,6 +203,8 @@ echo "==> Run Codex harness live test in Docker"
echo "==> Model: ${OPENCLAW_LIVE_CODEX_HARNESS_MODEL:-codex/gpt-5.5}"
echo "==> Image probe: ${OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE:-1}"
echo "==> MCP probe: ${OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE:-1}"
echo "==> Subagent probe: ${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE:-1}"
echo "==> Subagent-only fast path: ${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY:-auto}"
echo "==> Guardian probe: ${OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE:-1}"
echo "==> Auth mode: $CODEX_HARNESS_AUTH_MODE"
echo "==> Profile file: $PROFILE_STATUS"
@@ -230,6 +232,8 @@ DOCKER_RUN_ARGS=(docker run --rm -t \
-e OPENCLAW_LIVE_CODEX_HARNESS_MODEL="${OPENCLAW_LIVE_CODEX_HARNESS_MODEL:-codex/gpt-5.5}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS:-1}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS:-}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY="${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY:-}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE="${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE:-1}" \
-e OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG="${OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG:-1}" \
-e OPENCLAW_LIVE_CODEX_BIND="${OPENCLAW_LIVE_CODEX_BIND:-}" \
-e OPENCLAW_LIVE_CODEX_BIND_MODEL="${OPENCLAW_LIVE_CODEX_BIND_MODEL:-}" \

View File

@@ -269,6 +269,32 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
});
it("gives native child agent startup enough gateway request time", async () => {
const ctx = setupSessionsSpawnGatewayMock({
includeChatHistory: true,
agentWaitResult: { status: "ok", startedAt: 1000, endedAt: 2000 },
});
const tool = await getSessionsSpawnTool({
agentSessionKey: "main",
agentChannel: "whatsapp",
});
const result = await tool.execute("call-start-timeout", {
task: "do thing",
runTimeoutSeconds: 120,
});
expect(result.details).toMatchObject({
status: "accepted",
runId: expect.any(String),
});
const childAgentCall = ctx.calls.find((call) => {
const params = call.params as { lane?: string } | undefined;
return call.method === "agent" && params?.lane === "subagent";
});
expect(childAgentCall?.timeoutMs).toBe(125_000);
});
it("sessions_spawn retires bundle MCP runtime when run-mode cleanup completes", async () => {
let resumeAnnounceFlow: ((value: boolean) => void) | undefined;
let announceFlowStarted: (() => void) | undefined;

View File

@@ -12,7 +12,7 @@ type CreateSessionsSpawnTool =
type SubagentRegistryTesting = (typeof import("./subagent-registry.js"))["__testing"];
type SubagentSpawnTesting = (typeof import("./subagent-spawn.js"))["__testing"];
export type CreateOpenClawToolsOpts = Parameters<CreateSessionsSpawnTool>[0];
export type GatewayRequest = { method?: string; params?: unknown };
export type GatewayRequest = { method?: string; params?: unknown; timeoutMs?: number };
export type AgentWaitCall = { runId?: string; timeoutMs?: number };
type SessionsSpawnGatewayMockOptions = {
includeSessionsList?: boolean;

View File

@@ -9,6 +9,7 @@ type GatewayRequest = { method?: string; params?: Record<string, unknown> };
const hoisted = vi.hoisted(() => ({
callGatewayMock: vi.fn(),
configOverride: {} as Record<string, unknown>,
updateSessionStoreMock: vi.fn(),
}));
const hookRunnerMocks = vi.hoisted(() => ({
@@ -139,6 +140,7 @@ beforeAll(async () => {
({ resetSubagentRegistryForTests, spawnSubagentDirect } = await loadSubagentSpawnModuleForTest({
callGatewayMock: hoisted.callGatewayMock,
loadConfig: () => hoisted.configOverride,
updateSessionStoreMock: hoisted.updateSessionStoreMock,
hookRunner: {
hasHooks: (hookName: string) =>
hookName === "subagent_spawning" ||
@@ -157,6 +159,7 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
beforeEach(() => {
resetSubagentRegistryForTests();
hoisted.callGatewayMock.mockReset();
hoisted.updateSessionStoreMock.mockReset();
hookRunnerMocks.hasSubagentEndedHook = true;
hookRunnerMocks.runSubagentSpawning.mockClear();
hookRunnerMocks.runSubagentSpawned.mockClear();
@@ -167,6 +170,16 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
scope: "per-sender",
},
});
const store: Record<string, Record<string, unknown>> = {};
hoisted.updateSessionStoreMock.mockImplementation(
async (_storePath: unknown, mutator: unknown) => {
if (typeof mutator !== "function") {
throw new Error("missing session store mutator");
}
await mutator(store);
return store;
},
);
hoisted.callGatewayMock.mockImplementation(async (opts: unknown) => {
const request = opts as { method?: string };
if (request.method === "sessions.patch") {
@@ -398,11 +411,21 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
});
it("cleans up the provisional session when lineage patching fails after thread binding", async () => {
const store: Record<string, Record<string, unknown>> = {};
hoisted.updateSessionStoreMock.mockImplementation(
async (_storePath: unknown, mutator: unknown) => {
if (typeof mutator !== "function") {
throw new Error("missing session store mutator");
}
await mutator(store);
if (Object.values(store).some((entry) => typeof entry.spawnedBy === "string")) {
throw new Error("lineage patch failed");
}
return store;
},
);
hoisted.callGatewayMock.mockImplementation(async (opts: unknown) => {
const request = opts as { method?: string; params?: Record<string, unknown> };
if (request.method === "sessions.patch" && typeof request.params?.spawnedBy === "string") {
throw new Error("lineage patch failed");
}
if (request.method === "sessions.delete") {
return { ok: true };
}
@@ -420,10 +443,8 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
agentThreadId: "456",
});
expect(result).toMatchObject({
status: "error",
error: "lineage patch failed",
});
expect(result.status).toBe("error");
expect(result.error).toContain("lineage patch failed");
expect(hookRunnerMocks.runSubagentSpawned).not.toHaveBeenCalled();
expect(hookRunnerMocks.runSubagentEnded).not.toHaveBeenCalled();
const methods = getGatewayMethods();

View File

@@ -9,6 +9,7 @@ import {
} from "./subagent-spawn.test-helpers.js";
const callGatewayMock = vi.fn();
const updateSessionStoreMock = vi.fn();
let configOverride: Record<string, unknown> = {
...createSubagentSpawnTestConfig(),
@@ -20,6 +21,7 @@ beforeAll(async () => {
subagentSpawnModule = await loadSubagentSpawnModuleForTest({
callGatewayMock,
loadConfig: () => configOverride,
updateSessionStoreMock,
workspaceDir: workspaceDirOverride || os.tmpdir(),
});
});
@@ -92,6 +94,15 @@ describe("spawnSubagentDirect filename validation", () => {
configOverride = createSubagentSpawnTestConfig(workspaceDirOverride);
subagentSpawnModule.resetSubagentRegistryForTests();
callGatewayMock.mockClear();
updateSessionStoreMock.mockReset();
const store: Record<string, Record<string, unknown>> = {};
updateSessionStoreMock.mockImplementation(async (_storePath: unknown, mutator: unknown) => {
if (typeof mutator !== "function") {
throw new Error("missing session store mutator");
}
await mutator(store);
return store;
});
setupAcceptedSubagentGatewayMock(callGatewayMock);
});
@@ -170,12 +181,20 @@ describe("spawnSubagentDirect filename validation", () => {
it("removes materialized attachments when lineage patching fails", async () => {
const calls: Array<{ method?: string; params?: Record<string, unknown> }> = [];
const store: Record<string, Record<string, unknown>> = {};
updateSessionStoreMock.mockImplementation(async (_storePath: unknown, mutator: unknown) => {
if (typeof mutator !== "function") {
throw new Error("missing session store mutator");
}
await mutator(store);
if (Object.values(store).some((entry) => typeof entry.spawnedBy === "string")) {
throw new Error("lineage patch failed");
}
return store;
});
callGatewayMock.mockImplementation(async (opts: unknown) => {
const request = opts as { method?: string; params?: Record<string, unknown> };
calls.push(request);
if (request.method === "sessions.patch" && typeof request.params?.spawnedBy === "string") {
throw new Error("lineage patch failed");
}
if (request.method === "sessions.delete") {
return { ok: true };
}
@@ -191,10 +210,8 @@ describe("spawnSubagentDirect filename validation", () => {
ctx,
);
expect(result).toMatchObject({
status: "error",
error: "lineage patch failed",
});
expect(result.status).toBe("error");
expect(result.error).toContain("lineage patch failed");
const attachmentsRoot = path.join(workspaceDirOverride, ".openclaw", "attachments");
const retainedDirs = fs.existsSync(attachmentsRoot)
? fs.readdirSync(attachmentsRoot).filter((entry) => !entry.startsWith("."))

View File

@@ -1,6 +1,7 @@
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import {
createSubagentSpawnTestConfig,
installSessionStoreCaptureMock,
loadSubagentSpawnModuleForTest,
setupAcceptedSubagentGatewayMock,
} from "./subagent-spawn.test-helpers.js";
@@ -10,10 +11,12 @@ const hoisted = vi.hoisted(() => ({
callGatewayMock: vi.fn(),
configOverride: {} as Record<string, unknown>,
depthBySession: new Map<string, number>(),
updateSessionStoreMock: vi.fn(),
registerSubagentRunMock: vi.fn(),
}));
let spawnSubagentDirect: typeof import("./subagent-spawn.js").spawnSubagentDirect;
let persistedStore: Record<string, Record<string, unknown>> | undefined;
function createDepthLimitConfig(subagents?: Record<string, unknown>) {
return createSubagentSpawnTestConfig("/tmp/workspace-main", {
@@ -48,6 +51,7 @@ describe("subagent spawn depth + child limits", () => {
callGatewayMock: hoisted.callGatewayMock,
loadConfig: () => hoisted.configOverride,
registerSubagentRunMock: hoisted.registerSubagentRunMock,
updateSessionStoreMock: hoisted.updateSessionStoreMock,
getSubagentDepthFromSessionStore: (sessionKey) => hoisted.depthBySession.get(sessionKey) ?? 0,
countActiveRunsForSession: (sessionKey) =>
hoisted.activeChildrenBySession.get(sessionKey) ?? 0,
@@ -60,6 +64,13 @@ describe("subagent spawn depth + child limits", () => {
hoisted.depthBySession.clear();
hoisted.callGatewayMock.mockClear();
hoisted.registerSubagentRunMock.mockClear();
hoisted.updateSessionStoreMock.mockReset();
persistedStore = undefined;
installSessionStoreCaptureMock(hoisted.updateSessionStoreMock, {
onStore: (store) => {
persistedStore = store;
},
});
hoisted.configOverride = createDepthLimitConfig();
setupAcceptedSubagentGatewayMock(hoisted.callGatewayMock);
});
@@ -87,23 +98,14 @@ describe("subagent spawn depth + child limits", () => {
runId: "run-1",
});
const calls = hoisted.callGatewayMock.mock.calls.map(
(call) => call[0] as { method?: string; params?: Record<string, unknown> },
);
const spawnedByPatch = calls.find(
(entry) =>
entry.method === "sessions.patch" &&
entry.params?.spawnedBy === "agent:main:subagent:parent",
);
expect(spawnedByPatch?.params?.key).toMatch(/^agent:main:subagent:/);
expect(typeof spawnedByPatch?.params?.spawnedWorkspaceDir).toBe("string");
const spawnDepthPatch = calls.find(
(entry) => entry.method === "sessions.patch" && entry.params?.spawnDepth === 2,
);
expect(spawnDepthPatch?.params?.key).toMatch(/^agent:main:subagent:/);
expect(spawnDepthPatch?.params?.subagentRole).toBe("leaf");
expect(spawnDepthPatch?.params?.subagentControlScope).toBe("none");
const childSession = persistedStore?.[result.childSessionKey as string];
expect(childSession).toMatchObject({
spawnedBy: "agent:main:subagent:parent",
spawnDepth: 2,
subagentRole: "leaf",
subagentControlScope: "none",
});
expect(typeof childSession?.spawnedWorkspaceDir).toBe("string");
});
it("rejects callers when stored spawn depth is already at the configured max", async () => {
@@ -151,19 +153,17 @@ describe("subagent spawn depth + child limits", () => {
});
});
it("fails spawn when sessions.patch rejects the model", async () => {
it("fails spawn when the initial child session patch rejects the model", async () => {
hoisted.configOverride = createDepthLimitConfig({ maxSpawnDepth: 2 });
hoisted.callGatewayMock.mockImplementation(
async (opts: { method?: string; params?: { model?: string } }) => {
if (opts.method === "sessions.patch" && opts.params?.model === "bad-model") {
throw new Error("invalid model: bad-model");
}
if (opts.method === "agent") {
return { runId: "run-depth" };
}
return {};
},
);
hoisted.updateSessionStoreMock.mockRejectedValueOnce(new Error("invalid model: bad-model"));
const result = await spawnFrom("main", { model: "bad-model" });

View File

@@ -83,18 +83,17 @@ describe("spawnSubagentDirect runtime model persistence", () => {
status: "accepted",
modelApplied: true,
});
expect(updateSessionStoreMock).toHaveBeenCalledTimes(1);
expect(updateSessionStoreMock).toHaveBeenCalledTimes(3);
expectPersistedRuntimeModel({
persistedStore,
sessionKey: /^agent:main:subagent:/,
provider: "openai-codex",
model: "gpt-5.4",
});
expect(pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(1);
expect(operations.indexOf("gateway:sessions.patch")).toBeGreaterThan(-1);
expect(operations.indexOf("store:update")).toBeGreaterThan(
operations.indexOf("gateway:sessions.patch"),
expect(pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(3);
expect(operations.indexOf("store:update")).toBeGreaterThan(-1);
expect(operations.indexOf("gateway:agent")).toBeGreaterThan(
operations.lastIndexOf("store:update"),
);
expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update"));
});
});

View File

@@ -81,10 +81,10 @@ export function installSessionStoreCaptureMock(
onStore?: (store: SessionStore) => void;
},
) {
const store: SessionStore = {};
updateSessionStoreMock.mockImplementation(
async (_storePath: string, mutator: SessionStoreMutator) => {
params?.operations?.push("store:update");
const store: SessionStore = {};
await mutator(store);
params?.onStore?.(store);
return store;

View File

@@ -121,8 +121,8 @@ describe("spawnSubagentDirect seam flow", () => {
expect(result.childSessionKey).toMatch(/^agent:main:subagent:/);
const childSessionKey = result.childSessionKey as string;
expect(hoisted.pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(1);
expect(hoisted.updateSessionStoreMock).toHaveBeenCalledTimes(1);
expect(hoisted.pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(3);
expect(hoisted.updateSessionStoreMock).toHaveBeenCalledTimes(3);
expect(hoisted.registerSubagentRunMock).toHaveBeenCalledWith(
expect.objectContaining({
runId: "run-1",
@@ -156,11 +156,10 @@ describe("spawnSubagentDirect seam flow", () => {
provider: "openai-codex",
model: "gpt-5.4",
});
expect(operations.indexOf("gateway:sessions.patch")).toBeGreaterThan(-1);
expect(operations.indexOf("store:update")).toBeGreaterThan(
operations.indexOf("gateway:sessions.patch"),
expect(operations.indexOf("store:update")).toBeGreaterThan(-1);
expect(operations.indexOf("gateway:agent")).toBeGreaterThan(
operations.lastIndexOf("store:update"),
);
expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update"));
expect(hoisted.callGatewayMock).toHaveBeenCalledWith(
expect.objectContaining({
method: "agent",
@@ -289,16 +288,9 @@ describe("spawnSubagentDirect seam flow", () => {
});
});
it("returns an error when the initial model patch is rejected", async () => {
it("returns an error when the initial child session patch is rejected", async () => {
hoisted.callGatewayMock.mockImplementation(
async (request: { method?: string; params?: unknown }) => {
if (request.method === "sessions.patch") {
const model = (request.params as { model?: unknown } | undefined)?.model;
if (model === "bad-model") {
throw new Error("invalid model: bad-model");
}
return { ok: true };
}
if (request.method === "agent") {
return { runId: "run-1", status: "accepted", acceptedAt: 1000 };
}
@@ -308,6 +300,7 @@ describe("spawnSubagentDirect seam flow", () => {
return {};
},
);
hoisted.updateSessionStoreMock.mockRejectedValueOnce(new Error("invalid model: bad-model"));
const result = await spawnSubagentDirect(
{

View File

@@ -107,6 +107,9 @@ const defaultSubagentSpawnDeps: SubagentSpawnDeps = {
};
let subagentSpawnDeps: SubagentSpawnDeps = defaultSubagentSpawnDeps;
const SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS = 60_000;
const DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS = 60_000;
const MAX_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS = 300_000;
export type SpawnSubagentParams = {
task: string;
@@ -199,6 +202,53 @@ function readGatewayRunId(response: Awaited<ReturnType<typeof callGateway>>): st
return typeof runId === "string" && runId ? runId : undefined;
}
function resolveSubagentAgentGatewayTimeoutMs(runTimeoutSeconds: number): number {
const runTimeoutMs =
Number.isFinite(runTimeoutSeconds) && runTimeoutSeconds > 0
? Math.floor(runTimeoutSeconds * 1000)
: 0;
if (runTimeoutMs <= 0) {
return DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS;
}
return Math.min(
MAX_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS,
Math.max(DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS, runTimeoutMs + 5_000),
);
}
function buildDirectChildSessionPatch(patch: Record<string, unknown>): Partial<SessionEntry> {
const entry: Partial<SessionEntry> = {};
const spawnDepth = patch.spawnDepth;
if (typeof spawnDepth === "number" && Number.isFinite(spawnDepth) && spawnDepth >= 0) {
entry.spawnDepth = Math.floor(spawnDepth);
}
if (patch.subagentRole === "orchestrator" || patch.subagentRole === "leaf") {
entry.subagentRole = patch.subagentRole;
}
if (patch.subagentControlScope === "children" || patch.subagentControlScope === "none") {
entry.subagentControlScope = patch.subagentControlScope;
}
if (typeof patch.spawnedBy === "string" && patch.spawnedBy.trim()) {
entry.spawnedBy = patch.spawnedBy.trim();
}
if (typeof patch.spawnedWorkspaceDir === "string" && patch.spawnedWorkspaceDir.trim()) {
entry.spawnedWorkspaceDir = patch.spawnedWorkspaceDir.trim();
}
if (typeof patch.thinkingLevel === "string" && patch.thinkingLevel.trim()) {
entry.thinkingLevel = patch.thinkingLevel.trim();
}
if (typeof patch.model === "string" && patch.model.trim()) {
const { provider, model } = splitModelRef(patch.model.trim());
if (model) {
entry.model = model;
if (provider) {
entry.modelProvider = provider;
}
}
}
return entry;
}
function loadSubagentConfig() {
return subagentSpawnDeps.loadConfig();
}
@@ -430,7 +480,7 @@ async function cleanupProvisionalSession(
emitLifecycleHooks: options?.emitLifecycleHooks === true,
deleteTranscript: options?.deleteTranscript === true,
},
timeoutMs: 10_000,
timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
});
} catch {
// Best-effort cleanup only.
@@ -752,14 +802,25 @@ export async function spawnSubagentDirect(
const { resolvedModel, thinkingOverride } = plan;
const patchChildSession = async (patch: Record<string, unknown>): Promise<string | undefined> => {
try {
await callSubagentGateway({
method: "sessions.patch",
params: { key: childSessionKey, ...patch },
timeoutMs: 10_000,
const target = resolveGatewaySessionStoreTarget({
cfg,
key: childSessionKey,
});
await updateSubagentSessionStore(target.storePath, (store) => {
pruneLegacyStoreKeys({
store,
canonicalKey: target.canonicalKey,
candidates: target.storeKeys,
});
store[target.canonicalKey] = mergeSessionEntry(
store[target.canonicalKey],
buildDirectChildSessionPatch(patch),
);
});
return undefined;
} catch (err) {
return err instanceof Error ? err.message : typeof err === "string" ? err : "error";
const message = err instanceof Error ? err.message : typeof err === "string" ? err : "error";
return `child session patch failed: ${message}`;
}
};
@@ -808,7 +869,7 @@ export async function spawnSubagentDirect(
await callSubagentGateway({
method: "sessions.delete",
params: { key: childSessionKey, emitLifecycleHooks: false },
timeoutMs: 10_000,
timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
});
} catch {
// Best-effort cleanup only.
@@ -841,7 +902,7 @@ export async function spawnSubagentDirect(
await callSubagentGateway({
method: "sessions.delete",
params: { key: childSessionKey, emitLifecycleHooks: false },
timeoutMs: 10_000,
timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
});
} catch {
// Best-effort cleanup only.
@@ -1019,7 +1080,7 @@ export async function spawnSubagentDirect(
: {}),
...publicSpawnedMetadata,
},
timeoutMs: 10_000,
timeoutMs: resolveSubagentAgentGatewayTimeoutMs(runTimeoutSeconds),
});
const runId = readGatewayRunId(response);
if (runId) {
@@ -1074,7 +1135,7 @@ export async function spawnSubagentDirect(
deleteTranscript: true,
emitLifecycleHooks,
},
timeoutMs: 10_000,
timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
});
} catch {
// Best-effort only.
@@ -1125,7 +1186,7 @@ export async function spawnSubagentDirect(
deleteTranscript: true,
emitLifecycleHooks: threadBindingReady,
},
timeoutMs: 10_000,
timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
});
} catch {
// Best-effort cleanup only.

View File

@@ -34,6 +34,8 @@ export const EXPECTED_CODEX_MODELS_COMMAND_TEXT = [
"Available model overrides:",
"Available model overrides exposed in this session",
"Available model overrides here:",
"Available model overrides listed in this session:",
"Available model overrides shown in this session:",
"Available model overrides in this session:",
"Available agent models:",
"Visible options in this session:",
@@ -132,6 +134,8 @@ export function isExpectedCodexModelsCommandText(text: string): boolean {
const mentionsVisibleOptions =
normalized.includes("visible options in this session:") ||
normalized.includes("visible options:") ||
normalized.includes("available model overrides listed in this session:") ||
normalized.includes("available model overrides shown in this session:") ||
normalized.includes("available here:") ||
normalized.includes("available agent ids in this session:");
const mentionsCurrentActiveModel =

View File

@@ -3,10 +3,13 @@ import fs from "node:fs/promises";
import { createServer } from "node:net";
import os from "node:os";
import path from "node:path";
import { setTimeout as delay } from "node:timers/promises";
import { describe, expect, it } from "vitest";
import { isLiveTestEnabled } from "../agents/live-test-helpers.js";
import type { OpenClawConfig } from "../config/config.js";
import type { ContextEngine } from "../context-engine/types.js";
import { isTruthyEnvValue } from "../infra/env.js";
import type { CallGatewayOptions } from "./call.js";
import type { GatewayClient } from "./client.js";
import {
connectTestGatewayClient,
@@ -34,9 +37,18 @@ const CODEX_HARNESS_IMAGE_PROBE = isTruthyEnvValue(
process.env.OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE,
);
const CODEX_HARNESS_MCP_PROBE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE);
const CODEX_HARNESS_SUBAGENT_PROBE = isTruthyEnvValue(
process.env.OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE,
);
const CODEX_HARNESS_GUARDIAN_PROBE = isTruthyEnvValue(
process.env.OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE,
);
const CODEX_HARNESS_SUBAGENT_ONLY =
CODEX_HARNESS_SUBAGENT_PROBE &&
!CODEX_HARNESS_IMAGE_PROBE &&
!CODEX_HARNESS_MCP_PROBE &&
!CODEX_HARNESS_GUARDIAN_PROBE &&
process.env.OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY !== "0";
const CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS = isTruthyEnvValue(
process.env.OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS,
);
@@ -79,6 +91,10 @@ function isCodexAccountTokenError(error: unknown): boolean {
return error instanceof Error && error.message.includes("Failed to extract accountId from token");
}
function asRecord(value: unknown): Record<string, unknown> | undefined {
return value && typeof value === "object" ? (value as Record<string, unknown>) : undefined;
}
async function subscribeCodexLiveDebugEvents(sessionKey: string): Promise<() => void> {
if (!CODEX_HARNESS_DEBUG) {
return () => undefined;
@@ -498,6 +514,172 @@ async function verifyCodexCronMcpProbe(params: {
}
}
async function readSpawnedChildRow(params: {
childSessionKey: string;
client: GatewayClient;
parentSessionKey: string;
}): Promise<Record<string, unknown> | undefined> {
const result = await params.client.request(
"sessions.list",
{
spawnedBy: params.parentSessionKey,
includeLastMessage: true,
limit: 20,
},
{ timeoutMs: 10_000 },
);
const sessions = asRecord(result)?.sessions;
if (!Array.isArray(sessions)) {
return undefined;
}
return sessions
.map((entry) => asRecord(entry))
.find((entry): entry is Record<string, unknown> => entry?.key === params.childSessionKey);
}
async function waitForCodexSubagentStarted(params: {
childSessionKey: string;
client: GatewayClient;
events: CapturedAgentEvent[];
parentSessionKey: string;
}): Promise<Record<string, unknown> | undefined> {
const deadline = Date.now() + Math.min(CODEX_HARNESS_REQUEST_TIMEOUT_MS, 30_000);
let lastRow: Record<string, unknown> | undefined;
let lastError: unknown;
while (Date.now() < deadline) {
try {
lastRow = await readSpawnedChildRow({
childSessionKey: params.childSessionKey,
client: params.client,
parentSessionKey: params.parentSessionKey,
});
if (
lastRow &&
params.events.some(
(event) =>
event.sessionKey === params.childSessionKey &&
event.stream === "codex_app_server.lifecycle",
)
) {
return lastRow;
}
} catch (error) {
lastError = error;
}
await delay(2_000);
}
throw new Error(
[
`subagent ${params.childSessionKey} did not start through the Codex app-server harness`,
`lastRow=${JSON.stringify(lastRow)}`,
`events=${JSON.stringify(params.events)}`,
`lastError=${lastError instanceof Error ? lastError.message : String(lastError)}`,
].join("\n"),
);
}
async function verifyCodexSubagentProbe(params: {
client: GatewayClient;
sessionKey: string;
}): Promise<void> {
const runId = randomUUID();
const expectedToken = `CODEX-SUBAGENT-${runId.slice(0, 6).toUpperCase()}`;
const events: CapturedAgentEvent[] = [];
const { onAgentEvent } = await import("../infra/agent-events.js");
const unsubscribe = onAgentEvent((event) => {
if (!event.stream.startsWith("codex_app_server.")) {
return;
}
events.push({
stream: event.stream,
sessionKey: event.sessionKey,
data: event.data,
});
});
try {
const { __testing: subagentSpawnTesting, spawnSubagentDirect } =
await import("../agents/subagent-spawn.js");
const noOpContextEngine: ContextEngine = {
info: { id: "codex-harness-subagent-smoke", name: "Codex harness subagent smoke" },
ingest: async () => ({ ingested: false }),
assemble: async () => ({ messages: [], estimatedTokens: 0 }),
compact: async () => ({ ok: true, compacted: false }),
};
const gatewayTrace: Array<{
durationMs: number;
error?: string;
method: string;
status: "error" | "ok";
timeoutMs?: number;
}> = [];
subagentSpawnTesting.setDepsForTest({
resolveContextEngine: async () => noOpContextEngine,
callGateway: async <T = Record<string, unknown>>(opts: CallGatewayOptions): Promise<T> => {
const startedAt = Date.now();
try {
const result = await params.client.request(opts.method, opts.params, {
expectFinal: opts.method === "agent" ? false : opts.expectFinal,
timeoutMs: opts.timeoutMs,
});
gatewayTrace.push({
durationMs: Date.now() - startedAt,
method: opts.method,
status: "ok",
timeoutMs: opts.timeoutMs,
});
return result as T;
} catch (err) {
gatewayTrace.push({
durationMs: Date.now() - startedAt,
error: err instanceof Error ? err.message : String(err),
method: opts.method,
status: "error",
timeoutMs: opts.timeoutMs,
});
throw err;
}
},
});
const spawnResult = await spawnSubagentDirect(
{
task: `Reply exactly ${expectedToken} and nothing else.`,
agentId: "dev",
thinking: "low",
mode: "run",
cleanup: "keep",
context: "isolated",
expectsCompletionMessage: false,
runTimeoutSeconds: CODEX_HARNESS_AGENT_TIMEOUT_SECONDS,
},
{
agentSessionKey: params.sessionKey,
},
);
if (spawnResult.status !== "accepted") {
throw new Error(
`Codex subagent spawn failed: ${JSON.stringify(spawnResult)} trace=${JSON.stringify(gatewayTrace)}`,
);
}
const childSessionKey = spawnResult.childSessionKey;
if (!childSessionKey?.includes(":subagent:")) {
throw new Error(
`subagent spawn did not return a child session key: ${JSON.stringify(spawnResult)}`,
);
}
const childRow = await waitForCodexSubagentStarted({
childSessionKey,
client: params.client,
events,
parentSessionKey: params.sessionKey,
});
expect(childRow?.key).toBe(childSessionKey);
} finally {
const { __testing: subagentSpawnTesting } = await import("../agents/subagent-spawn.js");
subagentSpawnTesting.setDepsForTest();
unsubscribe();
}
}
describeLive("gateway live (Codex harness)", () => {
it(
"runs gateway agent turns through the plugin-owned Codex app-server harness",
@@ -569,6 +751,16 @@ describeLive("gateway live (Codex harness)", () => {
try {
try {
const sessionKey = "agent:dev:live-codex-harness";
if (CODEX_HARNESS_SUBAGENT_PROBE) {
logCodexLiveStep("subagent-probe:start", { sessionKey });
await verifyCodexSubagentProbe({ client, sessionKey });
logCodexLiveStep("subagent-probe:done");
if (CODEX_HARNESS_SUBAGENT_ONLY) {
return;
}
}
const unsubscribeDebugEvents = await subscribeCodexLiveDebugEvents(sessionKey);
const firstNonce = randomBytes(3).toString("hex").toUpperCase();
try {
@@ -609,6 +801,7 @@ describeLive("gateway live (Codex harness)", () => {
"model `codex/",
"session `agent:dev:live-codex-harness`",
"Model/status card shown above",
"Status shown above.",
],
});
logCodexLiveStep("codex-status-command", { statusText });