test: cover codex app-server subagents

2026-05-06 15:18:58 +00:00 · 2026-04-26 03:45:00 +01:00
parent c149de7750
commit 0ddbae171d
14 changed files with 448 additions and 83 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -85,21 +85,59 @@ Docs: https://docs.openclaw.ai
  and show daemon state separately when available, so `gateway.tailscale.mode:
 "off"` no longer reads like the Tailscale daemon is stopped. Fixes #71790.
  Thanks @pesvobodak.
- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables itself for the current Gateway process after repeated failed restarts while the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux, @FiredMosquito831, and @spikefcz.
- Gateway/Fly.io: seed Control UI allowed origins from the actual runtime bind and port so CLI-driven non-loopback starts do not crash before config exists. Fixes #71823.
- Gateway/proxy: bootstrap env proxy dispatching from direct Gateway startup so provider and plugin network requests honor `HTTPS_PROXY`/`HTTP_PROXY` before the first embedded agent attempt runs. (#71833) Thanks @mjamiv.
- Models/LM Studio: preserve `@iq*` quant suffixes in model refs and provider matching so `/model lmstudio/...@iq3_xxs` keeps the exact LM Studio variant. Fixes #71474. (#71486) Thanks @Bartok9, @XinwuC, and @Sanjays2402.
- Matrix/cron: preserve the live Matrix delivery target when creating implicit announce reminder jobs so mixed-case room IDs are not reconstructed from lowercased session keys. Fixes #71798.
- Feishu: accept Schema 2.0 card action callbacks that report `context.open_chat_id` instead of legacy `context.chat_id`, so button callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068.
- Feishu: keep synthetic card-action and bot-menu ids out of platform reply targets, using the real card callback message id when Feishu provides one and plain-sending otherwise. Fixes #71673. Thanks @eddy1068.
- Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and `qqbot_remind` tool registration noise. Fixes #63102.
- Browser automation: keep stable tab ids and labels attached when Chromium replaces the raw target after form submissions or other action-triggered navigations, and return the replacement `targetId` from `/act` when the match is provable. Fixes #46137.
- QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs directly for owner-authorized senders instead of returning `cronParams` and relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937) Thanks @GaosCode.
- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled `acpx`. Thanks @vincentkoc.
- Media delivery: avoid sending generated image attachments twice when the assistant reply already includes explicit `MEDIA:` lines for the same turn, and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash.
- Codex harness: ignore retryable app-server error notifications after Codex recovers, and preserve the real nested error message for terminal app-server failures instead of replacing it with a generic failure. Thanks @pashpashpash.
- Agents/subagents: keep queued subagent announces session-only when the requester has no external channel target, avoiding ambiguous multi-channel delivery failures. Fixes #59201. Thanks @larrylhollan.
- Image understanding: preserve configured provider-prefixed vision model metadata when callers request the model without the provider prefix, so custom image models keep their `input: ["text", "image"]` capability. Fixes #33185. Thanks @Kobe9312 and @vincentkoc.
+- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when
+  the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables
+  itself for the current Gateway process after repeated failed restarts while
+  the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux,
+  @FiredMosquito831, and @spikefcz.
+- Gateway/Fly.io: seed Control UI allowed origins from the actual runtime
+  bind and port so CLI-driven non-loopback starts do not crash before config
+  exists. Fixes #71823.
+- Gateway/proxy: bootstrap env proxy dispatching from direct Gateway startup
+  so provider and plugin network requests honor `HTTPS_PROXY`/`HTTP_PROXY`
+  before the first embedded agent attempt runs. (#71833) Thanks @mjamiv.
+- Models/LM Studio: preserve `@iq*` quant suffixes in model refs and provider
+  matching so `/model lmstudio/...@iq3_xxs` keeps the exact LM Studio variant.
+  Fixes #71474. (#71486) Thanks @Bartok9, @XinwuC, and @Sanjays2402.
+- Matrix/cron: preserve the live Matrix delivery target when creating implicit
+  announce reminder jobs so mixed-case room IDs are not reconstructed from
+  lowercased session keys. Fixes #71798.
+- Feishu: accept Schema 2.0 card action callbacks that report
+  `context.open_chat_id` instead of legacy `context.chat_id`, so button
+  callbacks no longer drop as malformed. Fixes #71670. Thanks @eddy1068.
+- Feishu: keep synthetic card-action and bot-menu ids out of platform reply
+  targets, using the real card callback message id when Feishu provides one and
+  plain-sending otherwise. Fixes #71673. Thanks @eddy1068.
+- Plugins/QQ Bot: prefer an installed QQ Bot plugin that declares it replaces
+  the bundled `qqbot` channel, preventing duplicate `qqbot_channel_api` and
+  `qqbot_remind` tool registration noise. Fixes #63102.
+- Browser automation: keep stable tab ids and labels attached when Chromium
+  replaces the raw target after form submissions or other action-triggered
+  navigations, and return the replacement `targetId` from `/act` when the match
+  is provable. Fixes #46137.
+- QQ Bot: make `qqbot_remind` schedule, list, and remove Gateway cron jobs
+  directly for owner-authorized senders instead of returning `cronParams` and
+  relying on a follow-up generic `cron` tool call. Fixes #70865. (#70937)
+  Thanks @GaosCode.
+- Agents/ACP: hide `sessions_spawn` ACP runtime options unless an ACP backend is
+  loaded, and make `/acp doctor` call out `plugins.allow` blocking bundled
+  `acpx`. Thanks @vincentkoc.
+- Media delivery: avoid sending generated image attachments twice when the
+  assistant reply already includes explicit `MEDIA:` lines for the same turn,
+  and reject unsafe remote `MEDIA:` URLs before delivery. Thanks @pashpashpash.
+- Codex harness: ignore retryable app-server error notifications after Codex
+  recovers, and preserve the real nested error message for terminal app-server
+  failures instead of replacing it with a generic failure. Thanks @pashpashpash.
+- Agents/Codex: prepare native Codex sub-agent session metadata without a
+  nested Gateway session patch and add a focused Docker smoke for the app-server
+  sub-agent path. Thanks @vincentkoc.
+- Agents/subagents: keep queued subagent announces session-only when the
+  requester has no external channel target, avoiding ambiguous multi-channel
+  delivery failures. Fixes #59201. Thanks @larrylhollan.
+- Image understanding: preserve configured provider-prefixed vision model
+  metadata when callers request the model without the provider prefix, so custom
+  image models keep their `input: ["text", "image"]` capability. Fixes #33185.
+  Thanks @Kobe9312 and @vincentkoc.
 - Plugins/install: restore the previous plugin index records if a concurrent config write conflict interrupts install, update, or uninstall metadata commits. Thanks @shakkernerd.
 - Plugins/update: restore previous plugin index records if core update or channel setup hits a concurrent config write conflict after plugin metadata changes. Thanks @shakkernerd.
 - Plugins/onboarding: defer channel/provider plugin install records until the owning config write commits, keeping setup failures from advancing the plugin index ahead of `openclaw.json`. Thanks @shakkernerd.
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -55,6 +55,15 @@ When debugging real providers/models (requires real creds):
    Slack DM with `/codex bind`, exercises `/codex fast` and
    `/codex permissions`, then verifies a plain reply and an image attachment
    route through the native plugin binding instead of ACP.
+- Codex app-server harness smoke: `pnpm test:docker:live-codex-harness`
+  - Runs gateway agent turns through the plugin-owned Codex app-server harness,
+    verifies `/codex status` and `/codex models`, and by default exercises image,
+    cron MCP, sub-agent, and Guardian probes. Disable the sub-agent probe with
+    `OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE=0` when isolating other Codex
+    app-server failures. For a focused sub-agent check, disable the other probes:
+    `OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE=0 OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE=1 pnpm test:docker:live-codex-harness`.
+    This exits after the sub-agent probe unless
+    `OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY=0` is set.
 - Crestodian rescue command smoke: `pnpm test:live:crestodian-rescue-channel`
  - Opt-in belt-and-suspenders check for the message-channel rescue command
    surface. It exercises `/crestodian status`, queues a persistent model
--- a/scripts/test-live-codex-harness-docker.sh
+++ b/scripts/test-live-codex-harness-docker.sh
@@ -203,6 +203,8 @@ echo "==> Run Codex harness live test in Docker"
 echo "==> Model: ${OPENCLAW_LIVE_CODEX_HARNESS_MODEL:-codex/gpt-5.5}"
 echo "==> Image probe: ${OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE:-1}"
 echo "==> MCP probe: ${OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE:-1}"
+echo "==> Subagent probe: ${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE:-1}"
+echo "==> Subagent-only fast path: ${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY:-auto}"
 echo "==> Guardian probe: ${OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE:-1}"
 echo "==> Auth mode: $CODEX_HARNESS_AUTH_MODE"
 echo "==> Profile file: $PROFILE_STATUS"
@@ -230,6 +232,8 @@ DOCKER_RUN_ARGS=(docker run --rm -t \
  -e OPENCLAW_LIVE_CODEX_HARNESS_MODEL="${OPENCLAW_LIVE_CODEX_HARNESS_MODEL:-codex/gpt-5.5}" \
  -e OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS:-1}" \
  -e OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS="${OPENCLAW_LIVE_CODEX_HARNESS_REQUEST_TIMEOUT_MS:-}" \
+  -e OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY="${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY:-}" \
+  -e OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE="${OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE:-1}" \
  -e OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG="${OPENCLAW_LIVE_CODEX_HARNESS_USE_CI_SAFE_CODEX_CONFIG:-1}" \
  -e OPENCLAW_LIVE_CODEX_BIND="${OPENCLAW_LIVE_CODEX_BIND:-}" \
  -e OPENCLAW_LIVE_CODEX_BIND_MODEL="${OPENCLAW_LIVE_CODEX_BIND_MODEL:-}" \
--- a/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts
+++ b/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts
@@ -269,6 +269,32 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
    expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
  });

+  it("gives native child agent startup enough gateway request time", async () => {
+    const ctx = setupSessionsSpawnGatewayMock({
+      includeChatHistory: true,
+      agentWaitResult: { status: "ok", startedAt: 1000, endedAt: 2000 },
+    });
+    const tool = await getSessionsSpawnTool({
+      agentSessionKey: "main",
+      agentChannel: "whatsapp",
+    });
+
+    const result = await tool.execute("call-start-timeout", {
+      task: "do thing",
+      runTimeoutSeconds: 120,
+    });
+
+    expect(result.details).toMatchObject({
+      status: "accepted",
+      runId: expect.any(String),
+    });
+    const childAgentCall = ctx.calls.find((call) => {
+      const params = call.params as { lane?: string } | undefined;
+      return call.method === "agent" && params?.lane === "subagent";
+    });
+    expect(childAgentCall?.timeoutMs).toBe(125_000);
+  });
+
  it("sessions_spawn retires bundle MCP runtime when run-mode cleanup completes", async () => {
    let resumeAnnounceFlow: ((value: boolean) => void) | undefined;
    let announceFlowStarted: (() => void) | undefined;
--- a/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts
+++ b/src/agents/openclaw-tools.subagents.sessions-spawn.test-harness.ts
@@ -12,7 +12,7 @@ type CreateSessionsSpawnTool =
 type SubagentRegistryTesting = (typeof import("./subagent-registry.js"))["__testing"];
 type SubagentSpawnTesting = (typeof import("./subagent-spawn.js"))["__testing"];
 export type CreateOpenClawToolsOpts = Parameters<CreateSessionsSpawnTool>[0];
-export type GatewayRequest = { method?: string; params?: unknown };
+export type GatewayRequest = { method?: string; params?: unknown; timeoutMs?: number };
 export type AgentWaitCall = { runId?: string; timeoutMs?: number };
 type SessionsSpawnGatewayMockOptions = {
  includeSessionsList?: boolean;
--- a/src/agents/sessions-spawn-hooks.test.ts
+++ b/src/agents/sessions-spawn-hooks.test.ts
@@ -9,6 +9,7 @@ type GatewayRequest = { method?: string; params?: Record<string, unknown> };
 const hoisted = vi.hoisted(() => ({
  callGatewayMock: vi.fn(),
  configOverride: {} as Record<string, unknown>,
+  updateSessionStoreMock: vi.fn(),
 }));

 const hookRunnerMocks = vi.hoisted(() => ({
@@ -139,6 +140,7 @@ beforeAll(async () => {
  ({ resetSubagentRegistryForTests, spawnSubagentDirect } = await loadSubagentSpawnModuleForTest({
    callGatewayMock: hoisted.callGatewayMock,
    loadConfig: () => hoisted.configOverride,
+    updateSessionStoreMock: hoisted.updateSessionStoreMock,
    hookRunner: {
      hasHooks: (hookName: string) =>
        hookName === "subagent_spawning" ||
@@ -157,6 +159,7 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
  beforeEach(() => {
    resetSubagentRegistryForTests();
    hoisted.callGatewayMock.mockReset();
+    hoisted.updateSessionStoreMock.mockReset();
    hookRunnerMocks.hasSubagentEndedHook = true;
    hookRunnerMocks.runSubagentSpawning.mockClear();
    hookRunnerMocks.runSubagentSpawned.mockClear();
@@ -167,6 +170,16 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
        scope: "per-sender",
      },
    });
+    const store: Record<string, Record<string, unknown>> = {};
+    hoisted.updateSessionStoreMock.mockImplementation(
+      async (_storePath: unknown, mutator: unknown) => {
+        if (typeof mutator !== "function") {
+          throw new Error("missing session store mutator");
+        }
+        await mutator(store);
+        return store;
+      },
+    );
    hoisted.callGatewayMock.mockImplementation(async (opts: unknown) => {
      const request = opts as { method?: string };
      if (request.method === "sessions.patch") {
@@ -398,11 +411,21 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
  });

  it("cleans up the provisional session when lineage patching fails after thread binding", async () => {
+    const store: Record<string, Record<string, unknown>> = {};
+    hoisted.updateSessionStoreMock.mockImplementation(
+      async (_storePath: unknown, mutator: unknown) => {
+        if (typeof mutator !== "function") {
+          throw new Error("missing session store mutator");
+        }
+        await mutator(store);
+        if (Object.values(store).some((entry) => typeof entry.spawnedBy === "string")) {
+          throw new Error("lineage patch failed");
+        }
+        return store;
+      },
+    );
    hoisted.callGatewayMock.mockImplementation(async (opts: unknown) => {
      const request = opts as { method?: string; params?: Record<string, unknown> };
-      if (request.method === "sessions.patch" && typeof request.params?.spawnedBy === "string") {
-        throw new Error("lineage patch failed");
-      }
      if (request.method === "sessions.delete") {
        return { ok: true };
      }
@@ -420,10 +443,8 @@ describe("sessions_spawn subagent lifecycle hooks", () => {
      agentThreadId: "456",
    });

-    expect(result).toMatchObject({
-      status: "error",
-      error: "lineage patch failed",
-    });
+    expect(result.status).toBe("error");
+    expect(result.error).toContain("lineage patch failed");
    expect(hookRunnerMocks.runSubagentSpawned).not.toHaveBeenCalled();
    expect(hookRunnerMocks.runSubagentEnded).not.toHaveBeenCalled();
    const methods = getGatewayMethods();
--- a/src/agents/subagent-spawn.attachments.test.ts
+++ b/src/agents/subagent-spawn.attachments.test.ts
@@ -9,6 +9,7 @@ import {
 } from "./subagent-spawn.test-helpers.js";

 const callGatewayMock = vi.fn();
+const updateSessionStoreMock = vi.fn();

 let configOverride: Record<string, unknown> = {
  ...createSubagentSpawnTestConfig(),
@@ -20,6 +21,7 @@ beforeAll(async () => {
  subagentSpawnModule = await loadSubagentSpawnModuleForTest({
    callGatewayMock,
    loadConfig: () => configOverride,
+    updateSessionStoreMock,
    workspaceDir: workspaceDirOverride || os.tmpdir(),
  });
 });
@@ -92,6 +94,15 @@ describe("spawnSubagentDirect filename validation", () => {
    configOverride = createSubagentSpawnTestConfig(workspaceDirOverride);
    subagentSpawnModule.resetSubagentRegistryForTests();
    callGatewayMock.mockClear();
+    updateSessionStoreMock.mockReset();
+    const store: Record<string, Record<string, unknown>> = {};
+    updateSessionStoreMock.mockImplementation(async (_storePath: unknown, mutator: unknown) => {
+      if (typeof mutator !== "function") {
+        throw new Error("missing session store mutator");
+      }
+      await mutator(store);
+      return store;
+    });
    setupAcceptedSubagentGatewayMock(callGatewayMock);
  });

@@ -170,12 +181,20 @@ describe("spawnSubagentDirect filename validation", () => {

  it("removes materialized attachments when lineage patching fails", async () => {
    const calls: Array<{ method?: string; params?: Record<string, unknown> }> = [];
+    const store: Record<string, Record<string, unknown>> = {};
+    updateSessionStoreMock.mockImplementation(async (_storePath: unknown, mutator: unknown) => {
+      if (typeof mutator !== "function") {
+        throw new Error("missing session store mutator");
+      }
+      await mutator(store);
+      if (Object.values(store).some((entry) => typeof entry.spawnedBy === "string")) {
+        throw new Error("lineage patch failed");
+      }
+      return store;
+    });
    callGatewayMock.mockImplementation(async (opts: unknown) => {
      const request = opts as { method?: string; params?: Record<string, unknown> };
      calls.push(request);
-      if (request.method === "sessions.patch" && typeof request.params?.spawnedBy === "string") {
-        throw new Error("lineage patch failed");
-      }
      if (request.method === "sessions.delete") {
        return { ok: true };
      }
@@ -191,10 +210,8 @@ describe("spawnSubagentDirect filename validation", () => {
      ctx,
    );

-    expect(result).toMatchObject({
-      status: "error",
-      error: "lineage patch failed",
-    });
+    expect(result.status).toBe("error");
+    expect(result.error).toContain("lineage patch failed");
    const attachmentsRoot = path.join(workspaceDirOverride, ".openclaw", "attachments");
    const retainedDirs = fs.existsSync(attachmentsRoot)
      ? fs.readdirSync(attachmentsRoot).filter((entry) => !entry.startsWith("."))
--- a/src/agents/subagent-spawn.depth-limits.test.ts
+++ b/src/agents/subagent-spawn.depth-limits.test.ts
@@ -1,6 +1,7 @@
 import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
 import {
  createSubagentSpawnTestConfig,
+  installSessionStoreCaptureMock,
  loadSubagentSpawnModuleForTest,
  setupAcceptedSubagentGatewayMock,
 } from "./subagent-spawn.test-helpers.js";
@@ -10,10 +11,12 @@ const hoisted = vi.hoisted(() => ({
  callGatewayMock: vi.fn(),
  configOverride: {} as Record<string, unknown>,
  depthBySession: new Map<string, number>(),
+  updateSessionStoreMock: vi.fn(),
  registerSubagentRunMock: vi.fn(),
 }));

 let spawnSubagentDirect: typeof import("./subagent-spawn.js").spawnSubagentDirect;
+let persistedStore: Record<string, Record<string, unknown>> | undefined;

 function createDepthLimitConfig(subagents?: Record<string, unknown>) {
  return createSubagentSpawnTestConfig("/tmp/workspace-main", {
@@ -48,6 +51,7 @@ describe("subagent spawn depth + child limits", () => {
      callGatewayMock: hoisted.callGatewayMock,
      loadConfig: () => hoisted.configOverride,
      registerSubagentRunMock: hoisted.registerSubagentRunMock,
+      updateSessionStoreMock: hoisted.updateSessionStoreMock,
      getSubagentDepthFromSessionStore: (sessionKey) => hoisted.depthBySession.get(sessionKey) ?? 0,
      countActiveRunsForSession: (sessionKey) =>
        hoisted.activeChildrenBySession.get(sessionKey) ?? 0,
@@ -60,6 +64,13 @@ describe("subagent spawn depth + child limits", () => {
    hoisted.depthBySession.clear();
    hoisted.callGatewayMock.mockClear();
    hoisted.registerSubagentRunMock.mockClear();
+    hoisted.updateSessionStoreMock.mockReset();
+    persistedStore = undefined;
+    installSessionStoreCaptureMock(hoisted.updateSessionStoreMock, {
+      onStore: (store) => {
+        persistedStore = store;
+      },
+    });
    hoisted.configOverride = createDepthLimitConfig();
    setupAcceptedSubagentGatewayMock(hoisted.callGatewayMock);
  });
@@ -87,23 +98,14 @@ describe("subagent spawn depth + child limits", () => {
      runId: "run-1",
    });

-    const calls = hoisted.callGatewayMock.mock.calls.map(
-      (call) => call[0] as { method?: string; params?: Record<string, unknown> },
-    );
-    const spawnedByPatch = calls.find(
-      (entry) =>
-        entry.method === "sessions.patch" &&
-        entry.params?.spawnedBy === "agent:main:subagent:parent",
-    );
-    expect(spawnedByPatch?.params?.key).toMatch(/^agent:main:subagent:/);
-    expect(typeof spawnedByPatch?.params?.spawnedWorkspaceDir).toBe("string");
-
-    const spawnDepthPatch = calls.find(
-      (entry) => entry.method === "sessions.patch" && entry.params?.spawnDepth === 2,
-    );
-    expect(spawnDepthPatch?.params?.key).toMatch(/^agent:main:subagent:/);
-    expect(spawnDepthPatch?.params?.subagentRole).toBe("leaf");
-    expect(spawnDepthPatch?.params?.subagentControlScope).toBe("none");
+    const childSession = persistedStore?.[result.childSessionKey as string];
+    expect(childSession).toMatchObject({
+      spawnedBy: "agent:main:subagent:parent",
+      spawnDepth: 2,
+      subagentRole: "leaf",
+      subagentControlScope: "none",
+    });
+    expect(typeof childSession?.spawnedWorkspaceDir).toBe("string");
  });

  it("rejects callers when stored spawn depth is already at the configured max", async () => {
@@ -151,19 +153,17 @@ describe("subagent spawn depth + child limits", () => {
    });
  });

-  it("fails spawn when sessions.patch rejects the model", async () => {
+  it("fails spawn when the initial child session patch rejects the model", async () => {
    hoisted.configOverride = createDepthLimitConfig({ maxSpawnDepth: 2 });
    hoisted.callGatewayMock.mockImplementation(
      async (opts: { method?: string; params?: { model?: string } }) => {
-        if (opts.method === "sessions.patch" && opts.params?.model === "bad-model") {
-          throw new Error("invalid model: bad-model");
-        }
        if (opts.method === "agent") {
          return { runId: "run-depth" };
        }
        return {};
      },
    );
+    hoisted.updateSessionStoreMock.mockRejectedValueOnce(new Error("invalid model: bad-model"));

    const result = await spawnFrom("main", { model: "bad-model" });

--- a/src/agents/subagent-spawn.model-session.test.ts
+++ b/src/agents/subagent-spawn.model-session.test.ts
@@ -83,18 +83,17 @@ describe("spawnSubagentDirect runtime model persistence", () => {
      status: "accepted",
      modelApplied: true,
    });
-    expect(updateSessionStoreMock).toHaveBeenCalledTimes(1);
+    expect(updateSessionStoreMock).toHaveBeenCalledTimes(3);
    expectPersistedRuntimeModel({
      persistedStore,
      sessionKey: /^agent:main:subagent:/,
      provider: "openai-codex",
      model: "gpt-5.4",
    });
-    expect(pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(1);
-    expect(operations.indexOf("gateway:sessions.patch")).toBeGreaterThan(-1);
-    expect(operations.indexOf("store:update")).toBeGreaterThan(
-      operations.indexOf("gateway:sessions.patch"),
+    expect(pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(3);
+    expect(operations.indexOf("store:update")).toBeGreaterThan(-1);
+    expect(operations.indexOf("gateway:agent")).toBeGreaterThan(
+      operations.lastIndexOf("store:update"),
    );
-    expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update"));
  });
 });
--- a/src/agents/subagent-spawn.test-helpers.ts
+++ b/src/agents/subagent-spawn.test-helpers.ts
@@ -81,10 +81,10 @@ export function installSessionStoreCaptureMock(
    onStore?: (store: SessionStore) => void;
  },
 ) {
+  const store: SessionStore = {};
  updateSessionStoreMock.mockImplementation(
    async (_storePath: string, mutator: SessionStoreMutator) => {
      params?.operations?.push("store:update");
-      const store: SessionStore = {};
      await mutator(store);
      params?.onStore?.(store);
      return store;
--- a/src/agents/subagent-spawn.test.ts
+++ b/src/agents/subagent-spawn.test.ts
@@ -121,8 +121,8 @@ describe("spawnSubagentDirect seam flow", () => {
    expect(result.childSessionKey).toMatch(/^agent:main:subagent:/);

    const childSessionKey = result.childSessionKey as string;
-    expect(hoisted.pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(1);
-    expect(hoisted.updateSessionStoreMock).toHaveBeenCalledTimes(1);
+    expect(hoisted.pruneLegacyStoreKeysMock).toHaveBeenCalledTimes(3);
+    expect(hoisted.updateSessionStoreMock).toHaveBeenCalledTimes(3);
    expect(hoisted.registerSubagentRunMock).toHaveBeenCalledWith(
      expect.objectContaining({
        runId: "run-1",
@@ -156,11 +156,10 @@ describe("spawnSubagentDirect seam flow", () => {
      provider: "openai-codex",
      model: "gpt-5.4",
    });
-    expect(operations.indexOf("gateway:sessions.patch")).toBeGreaterThan(-1);
-    expect(operations.indexOf("store:update")).toBeGreaterThan(
-      operations.indexOf("gateway:sessions.patch"),
+    expect(operations.indexOf("store:update")).toBeGreaterThan(-1);
+    expect(operations.indexOf("gateway:agent")).toBeGreaterThan(
+      operations.lastIndexOf("store:update"),
    );
-    expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update"));
    expect(hoisted.callGatewayMock).toHaveBeenCalledWith(
      expect.objectContaining({
        method: "agent",
@@ -289,16 +288,9 @@ describe("spawnSubagentDirect seam flow", () => {
    });
  });

-  it("returns an error when the initial model patch is rejected", async () => {
+  it("returns an error when the initial child session patch is rejected", async () => {
    hoisted.callGatewayMock.mockImplementation(
      async (request: { method?: string; params?: unknown }) => {
-        if (request.method === "sessions.patch") {
-          const model = (request.params as { model?: unknown } | undefined)?.model;
-          if (model === "bad-model") {
-            throw new Error("invalid model: bad-model");
-          }
-          return { ok: true };
-        }
        if (request.method === "agent") {
          return { runId: "run-1", status: "accepted", acceptedAt: 1000 };
        }
@@ -308,6 +300,7 @@ describe("spawnSubagentDirect seam flow", () => {
        return {};
      },
    );
+    hoisted.updateSessionStoreMock.mockRejectedValueOnce(new Error("invalid model: bad-model"));

    const result = await spawnSubagentDirect(
      {
--- a/src/agents/subagent-spawn.ts
+++ b/src/agents/subagent-spawn.ts
@@ -107,6 +107,9 @@ const defaultSubagentSpawnDeps: SubagentSpawnDeps = {
 };

 let subagentSpawnDeps: SubagentSpawnDeps = defaultSubagentSpawnDeps;
+const SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS = 60_000;
+const DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS = 60_000;
+const MAX_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS = 300_000;

 export type SpawnSubagentParams = {
  task: string;
@@ -199,6 +202,53 @@ function readGatewayRunId(response: Awaited<ReturnType<typeof callGateway>>): st
  return typeof runId === "string" && runId ? runId : undefined;
 }

+function resolveSubagentAgentGatewayTimeoutMs(runTimeoutSeconds: number): number {
+  const runTimeoutMs =
+    Number.isFinite(runTimeoutSeconds) && runTimeoutSeconds > 0
+      ? Math.floor(runTimeoutSeconds * 1000)
+      : 0;
+  if (runTimeoutMs <= 0) {
+    return DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS;
+  }
+  return Math.min(
+    MAX_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS,
+    Math.max(DEFAULT_SUBAGENT_AGENT_GATEWAY_TIMEOUT_MS, runTimeoutMs + 5_000),
+  );
+}
+
+function buildDirectChildSessionPatch(patch: Record<string, unknown>): Partial<SessionEntry> {
+  const entry: Partial<SessionEntry> = {};
+  const spawnDepth = patch.spawnDepth;
+  if (typeof spawnDepth === "number" && Number.isFinite(spawnDepth) && spawnDepth >= 0) {
+    entry.spawnDepth = Math.floor(spawnDepth);
+  }
+  if (patch.subagentRole === "orchestrator" || patch.subagentRole === "leaf") {
+    entry.subagentRole = patch.subagentRole;
+  }
+  if (patch.subagentControlScope === "children" || patch.subagentControlScope === "none") {
+    entry.subagentControlScope = patch.subagentControlScope;
+  }
+  if (typeof patch.spawnedBy === "string" && patch.spawnedBy.trim()) {
+    entry.spawnedBy = patch.spawnedBy.trim();
+  }
+  if (typeof patch.spawnedWorkspaceDir === "string" && patch.spawnedWorkspaceDir.trim()) {
+    entry.spawnedWorkspaceDir = patch.spawnedWorkspaceDir.trim();
+  }
+  if (typeof patch.thinkingLevel === "string" && patch.thinkingLevel.trim()) {
+    entry.thinkingLevel = patch.thinkingLevel.trim();
+  }
+  if (typeof patch.model === "string" && patch.model.trim()) {
+    const { provider, model } = splitModelRef(patch.model.trim());
+    if (model) {
+      entry.model = model;
+      if (provider) {
+        entry.modelProvider = provider;
+      }
+    }
+  }
+  return entry;
+}
+
 function loadSubagentConfig() {
  return subagentSpawnDeps.loadConfig();
 }
@@ -430,7 +480,7 @@ async function cleanupProvisionalSession(
        emitLifecycleHooks: options?.emitLifecycleHooks === true,
        deleteTranscript: options?.deleteTranscript === true,
      },
-      timeoutMs: 10_000,
+      timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
    });
  } catch {
    // Best-effort cleanup only.
@@ -752,14 +802,25 @@ export async function spawnSubagentDirect(
  const { resolvedModel, thinkingOverride } = plan;
  const patchChildSession = async (patch: Record<string, unknown>): Promise<string | undefined> => {
    try {
-      await callSubagentGateway({
-        method: "sessions.patch",
-        params: { key: childSessionKey, ...patch },
-        timeoutMs: 10_000,
+      const target = resolveGatewaySessionStoreTarget({
+        cfg,
+        key: childSessionKey,
+      });
+      await updateSubagentSessionStore(target.storePath, (store) => {
+        pruneLegacyStoreKeys({
+          store,
+          canonicalKey: target.canonicalKey,
+          candidates: target.storeKeys,
+        });
+        store[target.canonicalKey] = mergeSessionEntry(
+          store[target.canonicalKey],
+          buildDirectChildSessionPatch(patch),
+        );
      });
      return undefined;
    } catch (err) {
-      return err instanceof Error ? err.message : typeof err === "string" ? err : "error";
+      const message = err instanceof Error ? err.message : typeof err === "string" ? err : "error";
+      return `child session patch failed: ${message}`;
    }
  };

@@ -808,7 +869,7 @@ export async function spawnSubagentDirect(
        await callSubagentGateway({
          method: "sessions.delete",
          params: { key: childSessionKey, emitLifecycleHooks: false },
-          timeoutMs: 10_000,
+          timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
        });
      } catch {
        // Best-effort cleanup only.
@@ -841,7 +902,7 @@ export async function spawnSubagentDirect(
        await callSubagentGateway({
          method: "sessions.delete",
          params: { key: childSessionKey, emitLifecycleHooks: false },
-          timeoutMs: 10_000,
+          timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
        });
      } catch {
        // Best-effort cleanup only.
@@ -1019,7 +1080,7 @@ export async function spawnSubagentDirect(
          : {}),
        ...publicSpawnedMetadata,
      },
-      timeoutMs: 10_000,
+      timeoutMs: resolveSubagentAgentGatewayTimeoutMs(runTimeoutSeconds),
    });
    const runId = readGatewayRunId(response);
    if (runId) {
@@ -1074,7 +1135,7 @@ export async function spawnSubagentDirect(
          deleteTranscript: true,
          emitLifecycleHooks,
        },
-        timeoutMs: 10_000,
+        timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
      });
    } catch {
      // Best-effort only.
@@ -1125,7 +1186,7 @@ export async function spawnSubagentDirect(
          deleteTranscript: true,
          emitLifecycleHooks: threadBindingReady,
        },
-        timeoutMs: 10_000,
+        timeoutMs: SUBAGENT_CONTROL_GATEWAY_TIMEOUT_MS,
      });
    } catch {
      // Best-effort cleanup only.
--- a/src/gateway/gateway-codex-harness.live-helpers.ts
+++ b/src/gateway/gateway-codex-harness.live-helpers.ts
@@ -34,6 +34,8 @@ export const EXPECTED_CODEX_MODELS_COMMAND_TEXT = [
  "Available model overrides:",
  "Available model overrides exposed in this session",
  "Available model overrides here:",
+  "Available model overrides listed in this session:",
+  "Available model overrides shown in this session:",
  "Available model overrides in this session:",
  "Available agent models:",
  "Visible options in this session:",
@@ -132,6 +134,8 @@ export function isExpectedCodexModelsCommandText(text: string): boolean {
  const mentionsVisibleOptions =
    normalized.includes("visible options in this session:") ||
    normalized.includes("visible options:") ||
+    normalized.includes("available model overrides listed in this session:") ||
+    normalized.includes("available model overrides shown in this session:") ||
    normalized.includes("available here:") ||
    normalized.includes("available agent ids in this session:");
  const mentionsCurrentActiveModel =
--- a/src/gateway/gateway-codex-harness.live.test.ts
+++ b/src/gateway/gateway-codex-harness.live.test.ts
@@ -3,10 +3,13 @@ import fs from "node:fs/promises";
 import { createServer } from "node:net";
 import os from "node:os";
 import path from "node:path";
+import { setTimeout as delay } from "node:timers/promises";
 import { describe, expect, it } from "vitest";
 import { isLiveTestEnabled } from "../agents/live-test-helpers.js";
 import type { OpenClawConfig } from "../config/config.js";
+import type { ContextEngine } from "../context-engine/types.js";
 import { isTruthyEnvValue } from "../infra/env.js";
+import type { CallGatewayOptions } from "./call.js";
 import type { GatewayClient } from "./client.js";
 import {
  connectTestGatewayClient,
@@ -34,9 +37,18 @@ const CODEX_HARNESS_IMAGE_PROBE = isTruthyEnvValue(
  process.env.OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE,
 );
 const CODEX_HARNESS_MCP_PROBE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE);
+const CODEX_HARNESS_SUBAGENT_PROBE = isTruthyEnvValue(
+  process.env.OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_PROBE,
+);
 const CODEX_HARNESS_GUARDIAN_PROBE = isTruthyEnvValue(
  process.env.OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE,
 );
+const CODEX_HARNESS_SUBAGENT_ONLY =
+  CODEX_HARNESS_SUBAGENT_PROBE &&
+  !CODEX_HARNESS_IMAGE_PROBE &&
+  !CODEX_HARNESS_MCP_PROBE &&
+  !CODEX_HARNESS_GUARDIAN_PROBE &&
+  process.env.OPENCLAW_LIVE_CODEX_HARNESS_SUBAGENT_ONLY !== "0";
 const CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS = isTruthyEnvValue(
  process.env.OPENCLAW_LIVE_CODEX_HARNESS_REQUIRE_GUARDIAN_EVENTS,
 );
@@ -79,6 +91,10 @@ function isCodexAccountTokenError(error: unknown): boolean {
  return error instanceof Error && error.message.includes("Failed to extract accountId from token");
 }

+function asRecord(value: unknown): Record<string, unknown> | undefined {
+  return value && typeof value === "object" ? (value as Record<string, unknown>) : undefined;
+}
+
 async function subscribeCodexLiveDebugEvents(sessionKey: string): Promise<() => void> {
  if (!CODEX_HARNESS_DEBUG) {
    return () => undefined;
@@ -498,6 +514,172 @@ async function verifyCodexCronMcpProbe(params: {
  }
 }

+async function readSpawnedChildRow(params: {
+  childSessionKey: string;
+  client: GatewayClient;
+  parentSessionKey: string;
+}): Promise<Record<string, unknown> | undefined> {
+  const result = await params.client.request(
+    "sessions.list",
+    {
+      spawnedBy: params.parentSessionKey,
+      includeLastMessage: true,
+      limit: 20,
+    },
+    { timeoutMs: 10_000 },
+  );
+  const sessions = asRecord(result)?.sessions;
+  if (!Array.isArray(sessions)) {
+    return undefined;
+  }
+  return sessions
+    .map((entry) => asRecord(entry))
+    .find((entry): entry is Record<string, unknown> => entry?.key === params.childSessionKey);
+}
+
+async function waitForCodexSubagentStarted(params: {
+  childSessionKey: string;
+  client: GatewayClient;
+  events: CapturedAgentEvent[];
+  parentSessionKey: string;
+}): Promise<Record<string, unknown> | undefined> {
+  const deadline = Date.now() + Math.min(CODEX_HARNESS_REQUEST_TIMEOUT_MS, 30_000);
+  let lastRow: Record<string, unknown> | undefined;
+  let lastError: unknown;
+  while (Date.now() < deadline) {
+    try {
+      lastRow = await readSpawnedChildRow({
+        childSessionKey: params.childSessionKey,
+        client: params.client,
+        parentSessionKey: params.parentSessionKey,
+      });
+      if (
+        lastRow &&
+        params.events.some(
+          (event) =>
+            event.sessionKey === params.childSessionKey &&
+            event.stream === "codex_app_server.lifecycle",
+        )
+      ) {
+        return lastRow;
+      }
+    } catch (error) {
+      lastError = error;
+    }
+    await delay(2_000);
+  }
+  throw new Error(
+    [
+      `subagent ${params.childSessionKey} did not start through the Codex app-server harness`,
+      `lastRow=${JSON.stringify(lastRow)}`,
+      `events=${JSON.stringify(params.events)}`,
+      `lastError=${lastError instanceof Error ? lastError.message : String(lastError)}`,
+    ].join("\n"),
+  );
+}
+
+async function verifyCodexSubagentProbe(params: {
+  client: GatewayClient;
+  sessionKey: string;
+}): Promise<void> {
+  const runId = randomUUID();
+  const expectedToken = `CODEX-SUBAGENT-${runId.slice(0, 6).toUpperCase()}`;
+  const events: CapturedAgentEvent[] = [];
+  const { onAgentEvent } = await import("../infra/agent-events.js");
+  const unsubscribe = onAgentEvent((event) => {
+    if (!event.stream.startsWith("codex_app_server.")) {
+      return;
+    }
+    events.push({
+      stream: event.stream,
+      sessionKey: event.sessionKey,
+      data: event.data,
+    });
+  });
+  try {
+    const { __testing: subagentSpawnTesting, spawnSubagentDirect } =
+      await import("../agents/subagent-spawn.js");
+    const noOpContextEngine: ContextEngine = {
+      info: { id: "codex-harness-subagent-smoke", name: "Codex harness subagent smoke" },
+      ingest: async () => ({ ingested: false }),
+      assemble: async () => ({ messages: [], estimatedTokens: 0 }),
+      compact: async () => ({ ok: true, compacted: false }),
+    };
+    const gatewayTrace: Array<{
+      durationMs: number;
+      error?: string;
+      method: string;
+      status: "error" | "ok";
+      timeoutMs?: number;
+    }> = [];
+    subagentSpawnTesting.setDepsForTest({
+      resolveContextEngine: async () => noOpContextEngine,
+      callGateway: async <T = Record<string, unknown>>(opts: CallGatewayOptions): Promise<T> => {
+        const startedAt = Date.now();
+        try {
+          const result = await params.client.request(opts.method, opts.params, {
+            expectFinal: opts.method === "agent" ? false : opts.expectFinal,
+            timeoutMs: opts.timeoutMs,
+          });
+          gatewayTrace.push({
+            durationMs: Date.now() - startedAt,
+            method: opts.method,
+            status: "ok",
+            timeoutMs: opts.timeoutMs,
+          });
+          return result as T;
+        } catch (err) {
+          gatewayTrace.push({
+            durationMs: Date.now() - startedAt,
+            error: err instanceof Error ? err.message : String(err),
+            method: opts.method,
+            status: "error",
+            timeoutMs: opts.timeoutMs,
+          });
+          throw err;
+        }
+      },
+    });
+    const spawnResult = await spawnSubagentDirect(
+      {
+        task: `Reply exactly ${expectedToken} and nothing else.`,
+        agentId: "dev",
+        thinking: "low",
+        mode: "run",
+        cleanup: "keep",
+        context: "isolated",
+        expectsCompletionMessage: false,
+        runTimeoutSeconds: CODEX_HARNESS_AGENT_TIMEOUT_SECONDS,
+      },
+      {
+        agentSessionKey: params.sessionKey,
+      },
+    );
+    if (spawnResult.status !== "accepted") {
+      throw new Error(
+        `Codex subagent spawn failed: ${JSON.stringify(spawnResult)} trace=${JSON.stringify(gatewayTrace)}`,
+      );
+    }
+    const childSessionKey = spawnResult.childSessionKey;
+    if (!childSessionKey?.includes(":subagent:")) {
+      throw new Error(
+        `subagent spawn did not return a child session key: ${JSON.stringify(spawnResult)}`,
+      );
+    }
+    const childRow = await waitForCodexSubagentStarted({
+      childSessionKey,
+      client: params.client,
+      events,
+      parentSessionKey: params.sessionKey,
+    });
+    expect(childRow?.key).toBe(childSessionKey);
+  } finally {
+    const { __testing: subagentSpawnTesting } = await import("../agents/subagent-spawn.js");
+    subagentSpawnTesting.setDepsForTest();
+    unsubscribe();
+  }
+}
+
 describeLive("gateway live (Codex harness)", () => {
  it(
    "runs gateway agent turns through the plugin-owned Codex app-server harness",
@@ -569,6 +751,16 @@ describeLive("gateway live (Codex harness)", () => {
      try {
        try {
          const sessionKey = "agent:dev:live-codex-harness";
+
+          if (CODEX_HARNESS_SUBAGENT_PROBE) {
+            logCodexLiveStep("subagent-probe:start", { sessionKey });
+            await verifyCodexSubagentProbe({ client, sessionKey });
+            logCodexLiveStep("subagent-probe:done");
+            if (CODEX_HARNESS_SUBAGENT_ONLY) {
+              return;
+            }
+          }
+
          const unsubscribeDebugEvents = await subscribeCodexLiveDebugEvents(sessionKey);
          const firstNonce = randomBytes(3).toString("hex").toUpperCase();
          try {
@@ -609,6 +801,7 @@ describeLive("gateway live (Codex harness)", () => {
              "model `codex/",
              "session `agent:dev:live-codex-harness`",
              "Model/status card shown above",
+              "Status shown above.",
            ],
          });
          logCodexLiveStep("codex-status-command", { statusText });