agents: add strict-agentic execution contract

This commit is contained in:
Peter Steinberger
2026-04-10 21:57:11 +01:00
parent 3de0267908
commit 09b1117271
22 changed files with 584 additions and 53 deletions

View File

@@ -1,4 +1,4 @@
25d5d9f0f9cb9700024b5f054b19811b5ff764dfa090b8b82755eb6894afb5b0 config-baseline.json
e48b660fcae67c1e50a406e6bcc05584860e7693df3738adb664f9e2bc093e32 config-baseline.core.json
1977d4698bb80b9aa99315f1114a61b5692bd5630f2ac4a225d81ddc5459d588 config-baseline.json
d1ee5c4d01deac5cf8ea284cafcd8b6c952b2554d40947d2463d08e314acfcda config-baseline.core.json
e1f94346a8507ce3dec763b598e79f3bb89ff2e33189ce977cc87d3b05e71c1d config-baseline.channel.json
9153501720ea74f9356432a011fa9b41c9b700084bfe0d156feb5647624b35ad config-baseline.plugin.json
0fb10e5cb00e7da2cd07c959e0e3397ecb2fdcf15e13a7eae06a2c5b2346bb10 config-baseline.plugin.json

View File

@@ -1,2 +1,2 @@
cceabd98fbc368e04aba61e1d3712fe0f86749dc3872e4b9ba057784e29a8559 plugin-sdk-api-baseline.json
de31f5f77bda7163bed395b9669861cb64c514cf1666324e406b7882d84dee7c plugin-sdk-api-baseline.jsonl
2256ba1237c3608ca981bce3a7c66b6880b12d05025f260d5c086b69038f408b plugin-sdk-api-baseline.json
6360529513280140c122020466f0821a9acc83aba64612cf90656c2af0261ab3 plugin-sdk-api-baseline.jsonl

View File

@@ -2333,7 +2333,7 @@ Notes:
### `tools.experimental`
Experimental built-in tool flags. Default off unless a runtime-specific auto-enable rule applies.
Experimental built-in tool flags. Default off unless a strict-agentic GPT-5 auto-enable rule applies.
```json5
{
@@ -2348,7 +2348,7 @@ Experimental built-in tool flags. Default off unless a runtime-specific auto-ena
Notes:
- `planTool`: enables the structured `update_plan` tool for non-trivial multi-step work tracking.
- Default: `false` for non-OpenAI providers. OpenAI and OpenAI Codex runs auto-enable it when unset; set `false` to disable that auto-enable.
- Default: `false` unless `agents.defaults.embeddedPi.executionContract` (or a per-agent override) is set to `"strict-agentic"` for an OpenAI or OpenAI Codex GPT-5-family run. Set `true` to force the tool on outside that scope, or `false` to keep it off even for strict-agentic GPT-5 runs.
- When enabled, the system prompt also adds usage guidance so the model only uses it for substantial work and keeps at most one step `in_progress`.
### `agents.defaults.subagents`

View File

@@ -56,6 +56,7 @@ type ResolvedAgentConfig = {
identity?: AgentEntry["identity"];
groupChat?: AgentEntry["groupChat"];
subagents?: AgentEntry["subagents"];
embeddedPi?: AgentEntry["embeddedPi"];
sandbox?: AgentEntry["sandbox"];
tools?: AgentEntry["tools"];
};
@@ -163,11 +164,47 @@ export function resolveAgentConfig(
identity: entry.identity,
groupChat: entry.groupChat,
subagents: typeof entry.subagents === "object" && entry.subagents ? entry.subagents : undefined,
embeddedPi:
typeof entry.embeddedPi === "object" && entry.embeddedPi ? entry.embeddedPi : undefined,
sandbox: entry.sandbox,
tools: entry.tools,
};
}
export function resolveAgentExecutionContract(
cfg: OpenClawConfig | undefined,
agentId?: string | null,
): NonNullable<NonNullable<AgentDefaultsConfig["embeddedPi"]>["executionContract"]> | undefined {
const defaultContract = cfg?.agents?.defaults?.embeddedPi?.executionContract;
if (!cfg || !agentId) {
return defaultContract;
}
const agentContract = resolveAgentConfig(cfg, agentId)?.embeddedPi?.executionContract;
return agentContract ?? defaultContract;
}
export function isStrictAgenticExecutionContractActive(params: {
config?: OpenClawConfig;
sessionKey?: string;
agentId?: string | null;
provider?: string | null;
modelId?: string | null;
}): boolean {
const { sessionAgentId } = resolveSessionAgentIds({
sessionKey: params.sessionKey,
config: params.config,
agentId: params.agentId ?? undefined,
});
if (resolveAgentExecutionContract(params.config, sessionAgentId) !== "strict-agentic") {
return false;
}
const provider = normalizeLowercaseStringOrEmpty(params.provider ?? "");
if (provider !== "openai" && provider !== "openai-codex") {
return false;
}
return /^gpt-5(?:[.-]|$)/i.test(params.modelId?.trim() ?? "");
}
export function resolveAgentSkillsFilter(
cfg: OpenClawConfig,
agentId: string,

View File

@@ -1,5 +1,5 @@
import type { OpenClawConfig } from "../config/config.js";
import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
import { isStrictAgenticExecutionContractActive } from "./agent-scope.js";
import type { AnyAgentTool } from "./tools/common.js";
export function collectPresentOpenClawTools(
@@ -8,18 +8,22 @@ export function collectPresentOpenClawTools(
return candidates.filter((tool): tool is AnyAgentTool => tool !== null && tool !== undefined);
}
function isOpenAIProvider(provider?: string): boolean {
const normalized = normalizeOptionalLowercaseString(provider);
return normalized === "openai" || normalized === "openai-codex";
}
export function isUpdatePlanToolEnabledForOpenClawTools(
config: OpenClawConfig | undefined,
provider?: string,
): boolean {
const configured = config?.tools?.experimental?.planTool;
export function isUpdatePlanToolEnabledForOpenClawTools(params: {
config?: OpenClawConfig;
agentSessionKey?: string;
agentId?: string | null;
modelProvider?: string;
modelId?: string;
}): boolean {
const configured = params.config?.tools?.experimental?.planTool;
if (configured !== undefined) {
return configured;
}
return isOpenAIProvider(provider);
return isStrictAgenticExecutionContractActive({
config: params.config,
sessionKey: params.agentSessionKey,
agentId: params.agentId,
provider: params.modelProvider,
modelId: params.modelId,
});
}

View File

@@ -3,7 +3,7 @@ import { callGateway } from "../gateway/call.js";
import { getActiveRuntimeWebToolsMetadata } from "../secrets/runtime.js";
import { normalizeDeliveryContext } from "../utils/delivery-context.js";
import type { GatewayMessageChannel } from "../utils/message-channel.js";
import { resolveAgentWorkspaceDir, resolveSessionAgentId } from "./agent-scope.js";
import { resolveAgentWorkspaceDir, resolveSessionAgentIds } from "./agent-scope.js";
import { resolveOpenClawPluginToolsForOptions } from "./openclaw-plugin-tools.js";
import { applyNodesToolWorkspaceGuard } from "./openclaw-tools.nodes-workspace-guard.js";
import {
@@ -81,6 +81,8 @@ export function createOpenClawTools(
modelHasVision?: boolean;
/** Active model provider for provider-specific tool gating. */
modelProvider?: string;
/** Active model id for provider/model-specific tool gating. */
modelId?: string;
/** If true, nodes action="invoke" can call media-returning commands directly. */
allowMediaInvokeCommands?: boolean;
/** Explicit agent ID override for cron/hook sessions. */
@@ -111,9 +113,10 @@ export function createOpenClawTools(
} & SpawnedToolContext,
): AnyAgentTool[] {
const resolvedConfig = options?.config ?? openClawToolsDeps.config;
const sessionAgentId = resolveSessionAgentId({
const { sessionAgentId } = resolveSessionAgentIds({
sessionKey: options?.agentSessionKey,
config: resolvedConfig,
agentId: options?.requesterAgentIdOverride,
});
// Fall back to the session agent workspace so plugin loading stays workspace-stable
// even when a caller forgets to thread workspaceDir explicitly.
@@ -244,7 +247,13 @@ export function createOpenClawTools(
agentSessionKey: options?.agentSessionKey,
requesterAgentIdOverride: options?.requesterAgentIdOverride,
}),
...(isUpdatePlanToolEnabledForOpenClawTools(resolvedConfig, options?.modelProvider)
...(isUpdatePlanToolEnabledForOpenClawTools({
config: resolvedConfig,
agentSessionKey: options?.agentSessionKey,
agentId: options?.requesterAgentIdOverride,
modelProvider: options?.modelProvider,
modelId: options?.modelId,
})
? [createUpdatePlanTool()]
: []),
createSessionsListTool({

View File

@@ -5,7 +5,11 @@ import { createUpdatePlanTool } from "./tools/update-plan-tool.js";
describe("openclaw-tools update_plan gating", () => {
it("keeps update_plan disabled by default", () => {
expect(isUpdatePlanToolEnabledForOpenClawTools({} as OpenClawConfig)).toBe(false);
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config: {} as OpenClawConfig,
}),
).toBe(false);
});
it("registers update_plan when explicitly enabled", () => {
@@ -17,27 +21,177 @@ describe("openclaw-tools update_plan gating", () => {
},
} as OpenClawConfig;
expect(isUpdatePlanToolEnabledForOpenClawTools(config)).toBe(true);
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config,
}),
).toBe(true);
expect(createUpdatePlanTool().displaySummary).toBe("Track a short structured work plan.");
});
it("auto-enables update_plan for OpenAI-family providers", () => {
expect(isUpdatePlanToolEnabledForOpenClawTools({} as OpenClawConfig, "openai")).toBe(true);
expect(isUpdatePlanToolEnabledForOpenClawTools({} as OpenClawConfig, "openai-codex")).toBe(
true,
);
expect(isUpdatePlanToolEnabledForOpenClawTools({} as OpenClawConfig, "anthropic")).toBe(false);
it("does not auto-enable update_plan outside strict-agentic mode", () => {
const cfg = {
agents: {
list: [{ id: "main" }],
},
} as OpenClawConfig;
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config: cfg,
agentSessionKey: "agent:main:main",
modelProvider: "openai",
modelId: "gpt-5.4",
}),
).toBe(false);
});
it("lets config disable update_plan auto-enable", () => {
const config = {
it("auto-enables update_plan for strict-agentic GPT-5 agents", () => {
const cfg = {
agents: {
defaults: {
embeddedPi: {
executionContract: "strict-agentic",
},
},
list: [{ id: "main" }],
},
} as OpenClawConfig;
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config: cfg,
agentSessionKey: "agent:main:main",
modelProvider: "openai",
modelId: "gpt-5.4",
}),
).toBe(true);
});
it("does not auto-enable update_plan for unsupported providers or models", () => {
const cfg = {
agents: {
defaults: {
embeddedPi: {
executionContract: "strict-agentic",
},
},
list: [{ id: "main" }],
},
} as OpenClawConfig;
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config: cfg,
agentSessionKey: "agent:main:main",
modelProvider: "anthropic",
modelId: "claude-opus-4-6",
}),
).toBe(false);
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config: cfg,
agentSessionKey: "agent:main:main",
modelProvider: "openai",
modelId: "gpt-4.1",
}),
).toBe(false);
});
it("lets explicit planTool false override strict-agentic auto-enable", () => {
const cfg = {
tools: {
experimental: {
planTool: false,
},
},
agents: {
defaults: {
embeddedPi: {
executionContract: "strict-agentic",
},
},
list: [{ id: "main" }],
},
} as OpenClawConfig;
expect(isUpdatePlanToolEnabledForOpenClawTools(config, "openai")).toBe(false);
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config: cfg,
agentSessionKey: "agent:main:main",
modelProvider: "openai",
modelId: "gpt-5.4",
}),
).toBe(false);
});
it("resolves strict-agentic gating from explicit agentId when no session key is available", () => {
const cfg = {
agents: {
defaults: {
embeddedPi: {
executionContract: "default",
},
},
list: [
{ id: "main" },
{
id: "research",
embeddedPi: {
executionContract: "strict-agentic",
},
},
],
},
} as OpenClawConfig;
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config: cfg,
agentId: "research",
modelProvider: "openai",
modelId: "gpt-5.4",
}),
).toBe(true);
});
it("applies per-agent overrides without leaking the contract to other agents", () => {
const cfg = {
agents: {
defaults: {
embeddedPi: {
executionContract: "strict-agentic",
},
},
list: [
{
id: "main",
embeddedPi: {
executionContract: "default",
},
},
{
id: "research",
},
],
},
} as OpenClawConfig;
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config: cfg,
agentId: "main",
modelProvider: "openai",
modelId: "gpt-5.4",
}),
).toBe(false);
expect(
isUpdatePlanToolEnabledForOpenClawTools({
config: cfg,
agentId: "research",
modelProvider: "openai",
modelId: "gpt-5.4",
}),
).toBe(true);
});
});

View File

@@ -1,4 +1,5 @@
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../../config/config.js";
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
import {
loadRunOverflowCompactionHarness,
@@ -12,7 +13,9 @@ import {
extractPlanningOnlyPlanDetails,
isLikelyExecutionAckPrompt,
resolveAckExecutionFastPathInstruction,
resolvePlanningOnlyRetryLimit,
resolvePlanningOnlyRetryInstruction,
STRICT_AGENTIC_BLOCKED_TEXT,
} from "./run/incomplete-turn.js";
import type { EmbeddedRunAttemptResult } from "./run/types.js";
@@ -55,6 +58,50 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
expect(result.payloads?.[0]?.text).toContain("verify before retrying");
});
it("uses explicit agentId without a session key before surfacing the strict-agentic blocked state", async () => {
mockedClassifyFailoverReason.mockReturnValue(null);
mockedRunEmbeddedAttempt.mockResolvedValue(
makeAttemptResult({
assistantTexts: ["I'll inspect the code, make the change, and run the checks."],
}),
);
const result = await runEmbeddedPiAgent({
...overflowBaseRunParams,
sessionKey: undefined,
agentId: "research",
provider: "openai",
model: "gpt-5.4",
runId: "run-strict-agentic-explicit-agent",
config: {
agents: {
defaults: {
embeddedPi: {
executionContract: "default",
},
},
list: [
{ id: "main" },
{
id: "research",
embeddedPi: {
executionContract: "strict-agentic",
},
},
],
},
} as OpenClawConfig,
});
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(3);
expect(result.payloads).toEqual([
{
text: STRICT_AGENTIC_BLOCKED_TEXT,
isError: true,
},
]);
});
it("detects replay-safe planning-only GPT turns", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
@@ -69,6 +116,50 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
expect(retryInstruction).toContain("Do not restate the plan");
});
it("detects structured bullet-only plans with intent cues as planning-only GPT turns", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: [
"Plan:\n1. I'll inspect the code\n2. I'll patch the issue\n3. I'll run the tests",
],
}),
});
expect(retryInstruction).toContain("Do not restate the plan");
});
it("does not misclassify ordinary bullet summaries as planning-only", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: ["1. Parser refactor\n2. Regression coverage\n3. Docs cleanup"],
}),
});
expect(retryInstruction).toBeNull();
});
it("does not treat a bare plan heading as planning-only without an intent cue", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: ["Plan:\n1. Parser refactor\n2. Regression coverage\n3. Docs cleanup"],
}),
});
expect(retryInstruction).toBeNull();
});
it("does not retry planning-only detection after tool activity", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
@@ -103,6 +194,33 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
expect(retryInstruction).toBeNull();
});
it("treats update_plan as non-progress for planning-only retry detection", () => {
const retryInstruction = resolvePlanningOnlyRetryInstruction({
provider: "openai",
modelId: "gpt-5.4",
aborted: false,
timedOut: false,
attempt: makeAttemptResult({
assistantTexts: ["I'll capture the steps, then take the first tool action."],
toolMetas: [{ toolName: "update_plan", meta: "status=updated" }],
itemLifecycle: {
startedCount: 1,
completedCount: 1,
activeCount: 0,
},
}),
});
expect(retryInstruction).toContain("Act now");
});
it("allows one retry by default and two retries for strict-agentic runs", () => {
expect(resolvePlanningOnlyRetryLimit("default")).toBe(1);
expect(resolvePlanningOnlyRetryLimit("strict-agentic")).toBe(2);
expect(STRICT_AGENTIC_BLOCKED_TEXT).toContain("plan-only turns");
expect(STRICT_AGENTIC_BLOCKED_TEXT).toContain("advanced the task");
});
it("detects short execution approval prompts", () => {
expect(isLikelyExecutionAckPrompt("ok do it")).toBe(true);
expect(isLikelyExecutionAckPrompt("go ahead")).toBe(true);

View File

@@ -14,7 +14,12 @@ import { normalizeOptionalString } from "../../shared/string-coerce.js";
import { sanitizeForLog } from "../../terminal/ansi.js";
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
import { resolveOpenClawAgentDir } from "../agent-paths.js";
import { hasConfiguredModelFallbacks } from "../agent-scope.js";
import {
hasConfiguredModelFallbacks,
isStrictAgenticExecutionContractActive,
resolveAgentExecutionContract,
resolveSessionAgentIds,
} from "../agent-scope.js";
import {
type AuthProfileFailureReason,
markAuthProfileFailure,
@@ -93,7 +98,9 @@ import {
resolveAckExecutionFastPathInstruction,
resolveIncompleteTurnPayloadText,
extractPlanningOnlyPlanDetails,
resolvePlanningOnlyRetryLimit,
resolvePlanningOnlyRetryInstruction,
STRICT_AGENTIC_BLOCKED_TEXT,
} from "./run/incomplete-turn.js";
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
@@ -386,6 +393,22 @@ export async function runEmbeddedPiAgent(
});
await initializeAuthProfile();
const { sessionAgentId } = resolveSessionAgentIds({
sessionKey: params.sessionKey,
config: params.config,
agentId: params.agentId,
});
const configuredExecutionContract =
resolveAgentExecutionContract(params.config, sessionAgentId) ?? "default";
const strictAgenticActive = isStrictAgenticExecutionContractActive({
config: params.config,
sessionKey: params.sessionKey,
agentId: params.agentId,
provider,
modelId,
});
const executionContract = strictAgenticActive ? "strict-agentic" : "default";
const maxPlanningOnlyRetryAttempts = resolvePlanningOnlyRetryLimit(executionContract);
const MAX_TIMEOUT_COMPACTION_ATTEMPTS = 2;
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
@@ -1501,7 +1524,7 @@ export async function runEmbeddedPiAgent(
if (
!incompleteTurnText &&
nextPlanningOnlyRetryInstruction &&
planningOnlyRetryAttempts < 1
planningOnlyRetryAttempts < maxPlanningOnlyRetryAttempts
) {
const planningOnlyText = attempt.assistantTexts.join("\n\n").trim();
const planDetails = extractPlanningOnlyPlanDetails(planningOnlyText);
@@ -1532,10 +1555,38 @@ export async function runEmbeddedPiAgent(
planningOnlyRetryInstruction = nextPlanningOnlyRetryInstruction;
log.warn(
`planning-only turn detected: runId=${params.runId} sessionId=${params.sessionId} ` +
`provider=${provider}/${modelId} — retrying once with act-now steer`,
`provider=${provider}/${modelId} contract=${executionContract} configured=${configuredExecutionContract} — retrying ` +
`${planningOnlyRetryAttempts}/${maxPlanningOnlyRetryAttempts} with act-now steer`,
);
continue;
}
if (!incompleteTurnText && nextPlanningOnlyRetryInstruction && strictAgenticActive) {
log.warn(
`strict-agentic run exhausted planning-only retries: runId=${params.runId} sessionId=${params.sessionId} ` +
`provider=${provider}/${modelId} configured=${configuredExecutionContract} — surfacing blocked state`,
);
return {
payloads: [
{
text: STRICT_AGENTIC_BLOCKED_TEXT,
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta,
aborted,
systemPromptReport: attempt.systemPromptReport,
finalAssistantVisibleText,
},
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
didSendDeterministicApprovalPrompt: attempt.didSendDeterministicApprovalPrompt,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
messagingToolSentTargets: attempt.messagingToolSentTargets,
successfulCronAdds: attempt.successfulCronAdds,
};
}
if (incompleteTurnText) {
const incompleteStopReason = attempt.lastAssistant?.stopReason;
log.warn(

View File

@@ -1,3 +1,4 @@
import type { EmbeddedPiExecutionContract } from "../../../config/types.agent-defaults.js";
import { normalizeLowercaseStringOrEmpty } from "../../../shared/string-coerce.js";
import { isLikelyMutatingToolName } from "../../tool-mutation.js";
import type { EmbeddedRunAttemptResult } from "./types.js";
@@ -35,6 +36,8 @@ const PLANNING_ONLY_PROMISE_RE =
/\b(?:i(?:'ll| will)|let me|going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|i can do that)\b/i;
const PLANNING_ONLY_COMPLETION_RE =
/\b(?:done|finished|implemented|updated|fixed|changed|ran|verified|found|here(?:'s| is) what|blocked by|the blocker is)\b/i;
const PLANNING_ONLY_HEADING_RE = /^(?:plan|steps?|next steps?)\s*:/i;
const PLANNING_ONLY_BULLET_RE = /^(?:[-*]\s+|\d+[.)]\s+)/u;
const ACK_EXECUTION_NORMALIZED_SET = new Set([
"ok",
"okay",
@@ -81,6 +84,8 @@ export const PLANNING_ONLY_RETRY_INSTRUCTION =
"The previous assistant turn only described the plan. Do not restate the plan. Act now: take the first concrete tool action you can. If a real blocker prevents action, reply with the exact blocker in one sentence.";
export const ACK_EXECUTION_FAST_PATH_INSTRUCTION =
"The latest user message is a short approval to proceed. Do not recap or restate the plan. Start with the first concrete tool action immediately. Keep any user-facing follow-up brief and natural.";
export const STRICT_AGENTIC_BLOCKED_TEXT =
"⚠️ Agent stopped after repeated plan-only turns without taking a concrete action. No concrete tool action or external side effect advanced the task.";
export type PlanningOnlyPlanDetails = {
explanation: string;
@@ -131,7 +136,8 @@ function shouldApplyPlanningOnlyRetryGuard(params: {
provider?: string;
modelId?: string;
}): boolean {
if (params.provider !== "openai" && params.provider !== "openai-codex") {
const provider = normalizeLowercaseStringOrEmpty(params.provider);
if (provider !== "openai" && provider !== "openai-codex") {
return false;
}
return /^gpt-5(?:[.-]|$)/i.test(params.modelId ?? "");
@@ -190,6 +196,20 @@ function extractPlanningOnlySteps(text: string): string[] {
.slice(0, 4);
}
function hasStructuredPlanningOnlyFormat(text: string): boolean {
const lines = text
.split(/\r?\n/)
.map((line) => line.trim())
.filter(Boolean);
if (lines.length === 0) {
return false;
}
const bulletLineCount = lines.filter((line) => PLANNING_ONLY_BULLET_RE.test(line)).length;
const hasPlanningCueLine = lines.some((line) => PLANNING_ONLY_PROMISE_RE.test(line));
const hasPlanningHeading = PLANNING_ONLY_HEADING_RE.test(lines[0] ?? "");
return (hasPlanningHeading && hasPlanningCueLine) || (bulletLineCount >= 2 && hasPlanningCueLine);
}
export function extractPlanningOnlyPlanDetails(text: string): PlanningOnlyPlanDetails | null {
const trimmed = text.trim();
if (!trimmed) {
@@ -202,6 +222,20 @@ export function extractPlanningOnlyPlanDetails(text: string): PlanningOnlyPlanDe
};
}
function countPlanOnlyToolMetas(toolMetas: PlanningOnlyAttempt["toolMetas"]): number {
return toolMetas.filter((entry) => entry.toolName === "update_plan").length;
}
function hasNonPlanToolActivity(toolMetas: PlanningOnlyAttempt["toolMetas"]): boolean {
return toolMetas.some((entry) => entry.toolName !== "update_plan");
}
export function resolvePlanningOnlyRetryLimit(
executionContract?: EmbeddedPiExecutionContract,
): number {
return executionContract === "strict-agentic" ? 2 : 1;
}
export function resolvePlanningOnlyRetryInstruction(params: {
provider?: string;
modelId?: string;
@@ -209,6 +243,7 @@ export function resolvePlanningOnlyRetryInstruction(params: {
timedOut: boolean;
attempt: PlanningOnlyAttempt;
}): string | null {
const planOnlyToolMetaCount = countPlanOnlyToolMetas(params.attempt.toolMetas);
if (
!shouldApplyPlanningOnlyRetryGuard({
provider: params.provider,
@@ -221,7 +256,8 @@ export function resolvePlanningOnlyRetryInstruction(params: {
params.attempt.didSendDeterministicApprovalPrompt ||
params.attempt.didSendViaMessagingTool ||
params.attempt.lastToolError ||
params.attempt.itemLifecycle.startedCount > 0 ||
hasNonPlanToolActivity(params.attempt.toolMetas) ||
params.attempt.itemLifecycle.startedCount > planOnlyToolMetaCount ||
params.attempt.replayMetadata.hadPotentialSideEffects
) {
return null;
@@ -236,7 +272,7 @@ export function resolvePlanningOnlyRetryInstruction(params: {
if (!text || text.length > 700 || text.includes("```")) {
return null;
}
if (!PLANNING_ONLY_PROMISE_RE.test(text)) {
if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningOnlyFormat(text)) {
return null;
}
if (PLANNING_ONLY_COMPLETION_RE.test(text)) {

View File

@@ -556,6 +556,7 @@ export function createOpenClawCodingTools(options?: {
currentThreadTs: options?.currentThreadTs,
currentMessageId: options?.currentMessageId,
modelProvider: options?.modelProvider,
modelId: options?.modelId,
replyToMode: options?.replyToMode,
hasRepliedRef: options?.hasRepliedRef,
modelHasVision: options?.modelHasVision,

View File

@@ -13,9 +13,15 @@ describe("update_plan tool", () => {
],
});
expect(result.content).toEqual([{ type: "text", text: "Plan updated." }]);
expect(result.content).toEqual([]);
expect(result.details).toEqual({
status: "updated",
explanation: "Started work",
plan: [
{ step: "Inspect harness", status: "completed" },
{ step: "Add tool", status: "in_progress" },
{ step: "Run tests", status: "pending" },
],
});
});
@@ -31,4 +37,23 @@ describe("update_plan tool", () => {
}),
).rejects.toThrow("plan can contain at most one in_progress step");
});
it("ignores extra per-step fields instead of rejecting the plan", async () => {
const tool = createUpdatePlanTool();
const result = await tool.execute("call-1", {
plan: [
{ step: "Inspect harness", status: "completed", owner: "agent-1" },
{ step: "Run tests", status: "pending", notes: ["later"] },
],
});
expect(result.content).toEqual([]);
expect(result.details).toEqual({
status: "updated",
plan: [
{ step: "Inspect harness", status: "completed" },
{ step: "Run tests", status: "pending" },
],
});
});
});

View File

@@ -4,7 +4,7 @@ import {
describeUpdatePlanTool,
UPDATE_PLAN_TOOL_DISPLAY_SUMMARY,
} from "../tool-description-presets.js";
import { type AnyAgentTool, ToolInputError, textResult, readStringParam } from "./common.js";
import { type AnyAgentTool, ToolInputError, readStringParam } from "./common.js";
const PLAN_STEP_STATUSES = ["pending", "in_progress", "completed"] as const;
@@ -22,7 +22,7 @@ const UpdatePlanToolSchema = Type.Object({
description: 'One of "pending", "in_progress", or "completed".',
}),
},
{ additionalProperties: false },
{ additionalProperties: true },
),
{
minItems: 1,
@@ -82,11 +82,16 @@ export function createUpdatePlanTool(): AnyAgentTool {
parameters: UpdatePlanToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
readStringParam(params, "explanation");
readPlanSteps(params);
return textResult("Plan updated.", {
status: "updated" as const,
});
const explanation = readStringParam(params, "explanation");
const plan = readPlanSteps(params);
return {
content: [],
details: {
status: "updated" as const,
...(explanation ? { explanation } : {}),
plan,
},
};
},
};
}

View File

@@ -4485,6 +4485,21 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
description:
'How embedded Pi handles workspace-local `.pi/config/settings.json`: "sanitize" (default) strips shellPath/shellCommandPrefix, "ignore" disables project settings entirely, and "trusted" applies project settings as-is.',
},
executionContract: {
anyOf: [
{
type: "string",
const: "default",
},
{
type: "string",
const: "strict-agentic",
},
],
title: "Embedded Pi Execution Contract",
description:
'Embedded Pi execution contract: "default" keeps the standard runner behavior, while "strict-agentic" keeps OpenAI/OpenAI Codex GPT-5-family runs acting until they hit a real blocker instead of stopping at plans or filler.',
},
},
additionalProperties: false,
title: "Embedded Pi",
@@ -6121,6 +6136,30 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
},
additionalProperties: false,
},
embeddedPi: {
type: "object",
properties: {
executionContract: {
anyOf: [
{
type: "string",
const: "default",
},
{
type: "string",
const: "strict-agentic",
},
],
title: "Agent Embedded Pi Execution Contract",
description:
'Optional per-agent embedded Pi execution contract override. Set "strict-agentic" to keep that agent acting through plan-only turns on OpenAI/OpenAI Codex GPT-5-family runs, or "default" to inherit the standard runner behavior.',
},
},
additionalProperties: false,
title: "Agent Embedded Pi",
description:
"Optional per-agent embedded Pi overrides. Use this to opt specific agents into stricter GPT-5 execution behavior without changing the global default.",
},
sandbox: {
type: "object",
properties: {
@@ -17251,7 +17290,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
type: "boolean",
title: "Enable Structured Plan Tool",
description:
"Enable or disable the experimental structured `update_plan` tool for non-trivial multi-step work tracking. OpenAI and OpenAI Codex runs auto-enable it when this flag is unset; set false to disable that auto-enable.",
"Enable the experimental structured `update_plan` tool for non-trivial multi-step work tracking. Leave this off unless you explicitly want the tool outside strict-agentic embedded Pi runs.",
},
},
additionalProperties: false,
@@ -23657,7 +23696,7 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
},
"tools.experimental.planTool": {
label: "Enable Structured Plan Tool",
help: "Enable or disable the experimental structured `update_plan` tool for non-trivial multi-step work tracking. OpenAI and OpenAI Codex runs auto-enable it when this flag is unset; set false to disable that auto-enable.",
help: "Enable the experimental structured `update_plan` tool for non-trivial multi-step work tracking. Leave this off unless you explicitly want the tool outside strict-agentic embedded Pi runs.",
tags: ["security", "tools", "advanced"],
},
"tools.elevated": {
@@ -25341,6 +25380,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: 'How embedded Pi handles workspace-local `.pi/config/settings.json`: "sanitize" (default) strips shellPath/shellCommandPrefix, "ignore" disables project settings entirely, and "trusted" applies project settings as-is.',
tags: ["access"],
},
"agents.defaults.embeddedPi.executionContract": {
label: "Embedded Pi Execution Contract",
help: 'Embedded Pi execution contract: "default" keeps the standard runner behavior, while "strict-agentic" keeps OpenAI/OpenAI Codex GPT-5-family runs acting until they hit a real blocker instead of stopping at plans or filler.',
tags: ["advanced"],
},
"agents.defaults.heartbeat.includeSystemPromptSection": {
label: "Heartbeat Include System Prompt Section",
help: "Includes the default agent's ## Heartbeats system prompt section when true. Turn this off to keep heartbeat runtime behavior while omitting the heartbeat prompt instructions from the agent system prompt.",
@@ -25351,6 +25395,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: "Per-agent override for whether the default agent's ## Heartbeats system prompt section is injected. Use false to keep heartbeat runtime behavior but omit the heartbeat prompt instructions from that agent's system prompt.",
tags: ["automation"],
},
"agents.list[].embeddedPi": {
label: "Agent Embedded Pi",
help: "Optional per-agent embedded Pi overrides. Use this to opt specific agents into stricter GPT-5 execution behavior without changing the global default.",
tags: ["advanced"],
},
"agents.list[].embeddedPi.executionContract": {
label: "Agent Embedded Pi Execution Contract",
help: 'Optional per-agent embedded Pi execution contract override. Set "strict-agentic" to keep that agent acting through plan-only turns on OpenAI/OpenAI Codex GPT-5-family runs, or "default" to inherit the standard runner behavior.',
tags: ["advanced"],
},
"agents.defaults.heartbeat.directPolicy": {
label: "Heartbeat Direct Policy",
help: 'Controls whether heartbeat delivery may target direct/DM chats: "allow" (default) permits DM delivery and "block" suppresses direct-target sends.',

View File

@@ -315,7 +315,7 @@ export const FIELD_HELP: Record<string, string> = {
"tools.experimental":
"Experimental built-in tool flags. Keep these off by default and enable only when you are intentionally testing a preview surface.",
"tools.experimental.planTool":
"Enable or disable the experimental structured `update_plan` tool for non-trivial multi-step work tracking. OpenAI and OpenAI Codex runs auto-enable it when this flag is unset; set false to disable that auto-enable.",
"Enable the experimental structured `update_plan` tool for non-trivial multi-step work tracking. Leave this off unless you explicitly want the tool outside strict-agentic embedded Pi runs.",
"tools.elevated":
"Elevated tool access controls for privileged command surfaces that should only be reachable from trusted senders. Keep disabled unless operator workflows explicitly require elevated actions.",
"tools.elevated.enabled":
@@ -1181,6 +1181,12 @@ export const FIELD_HELP: Record<string, string> = {
"Embedded Pi runner hardening controls for how workspace-local Pi settings are trusted and applied in OpenClaw sessions.",
"agents.defaults.embeddedPi.projectSettingsPolicy":
'How embedded Pi handles workspace-local `.pi/config/settings.json`: "sanitize" (default) strips shellPath/shellCommandPrefix, "ignore" disables project settings entirely, and "trusted" applies project settings as-is.',
"agents.defaults.embeddedPi.executionContract":
'Embedded Pi execution contract: "default" keeps the standard runner behavior, while "strict-agentic" keeps OpenAI/OpenAI Codex GPT-5-family runs acting until they hit a real blocker instead of stopping at plans or filler.',
"agents.list[].embeddedPi":
"Optional per-agent embedded Pi overrides. Use this to opt specific agents into stricter GPT-5 execution behavior without changing the global default.",
"agents.list[].embeddedPi.executionContract":
'Optional per-agent embedded Pi execution contract override. Set "strict-agentic" to keep that agent acting through plan-only turns on OpenAI/OpenAI Codex GPT-5-family runs, or "default" to inherit the standard runner behavior.',
"agents.defaults.humanDelay.mode": 'Delay style for block replies ("off", "natural", "custom").',
"agents.defaults.humanDelay.minMs": "Minimum delay in ms for custom humanDelay (default: 800).",
"agents.defaults.humanDelay.maxMs": "Maximum delay in ms for custom humanDelay (default: 2500).",

View File

@@ -549,8 +549,11 @@ export const FIELD_LABELS: Record<string, string> = {
"agents.defaults.compaction.memoryFlush.systemPrompt": "Compaction Memory Flush System Prompt",
"agents.defaults.embeddedPi": "Embedded Pi",
"agents.defaults.embeddedPi.projectSettingsPolicy": "Embedded Pi Project Settings Policy",
"agents.defaults.embeddedPi.executionContract": "Embedded Pi Execution Contract",
"agents.defaults.heartbeat.includeSystemPromptSection": "Heartbeat Include System Prompt Section",
"agents.list.*.heartbeat.includeSystemPromptSection": "Heartbeat Include System Prompt Section",
"agents.list[].embeddedPi": "Agent Embedded Pi",
"agents.list[].embeddedPi.executionContract": "Agent Embedded Pi Execution Contract",
"agents.defaults.heartbeat.directPolicy": "Heartbeat Direct Policy",
"agents.list.*.heartbeat.directPolicy": "Heartbeat Direct Policy",
"agents.defaults.heartbeat.suppressToolErrorWarnings": "Heartbeat Suppress Tool Error Warnings",

View File

@@ -12,6 +12,7 @@ import type {
import type { MemorySearchConfig } from "./types.tools.js";
export type AgentContextInjection = "always" | "continuation-skip";
export type EmbeddedPiExecutionContract = "default" | "strict-agentic";
export type AgentModelEntryConfig = {
alias?: string;
@@ -222,6 +223,12 @@ export type AgentDefaultsConfig = {
* - trusted: trust project settings as-is
*/
projectSettingsPolicy?: "trusted" | "sanitize" | "ignore";
/**
* Embedded Pi execution contract:
* - default: keep the standard runner behavior
* - strict-agentic: on OpenAI/OpenAI Codex GPT-5-family runs, keep acting until hitting a real blocker
*/
executionContract?: EmbeddedPiExecutionContract;
};
/** Vector memory search configuration (per-agent overrides supported). */
memorySearch?: MemorySearchConfig;

View File

@@ -1,5 +1,5 @@
import type { ChatType } from "../channels/chat-type.js";
import type { AgentDefaultsConfig } from "./types.agent-defaults.js";
import type { AgentDefaultsConfig, EmbeddedPiExecutionContract } from "./types.agent-defaults.js";
import type {
AgentEmbeddedHarnessConfig,
AgentModelConfig,
@@ -98,6 +98,11 @@ export type AgentConfig = {
/** Require explicit agentId in sessions_spawn (no default same-as-caller). */
requireAgentId?: boolean;
};
/** Optional per-agent embedded Pi overrides. */
embeddedPi?: {
/** Optional per-agent execution contract override. */
executionContract?: EmbeddedPiExecutionContract;
};
/** Optional per-agent sandbox overrides. */
sandbox?: AgentSandboxConfig;
/** Optional per-agent stream params (e.g. cacheRetention, temperature). */

View File

@@ -640,9 +640,9 @@ export type ToolsConfig = {
deny?: string[];
};
};
/** Experimental tool flags. Default off unless explicitly enabled or runtime auto-enabled. */
/** Experimental tool flags. Default off unless explicitly enabled, except strict-agentic GPT-5 OpenAI/Codex runs may auto-enable `planTool`. */
experimental?: {
/** Enable or disable the structured `update_plan` tool. OpenAI-family runs auto-enable it unless this is false. */
/** Enable the structured `update_plan` tool explicitly outside strict-agentic execution mode. */
planTool?: boolean;
};
};

View File

@@ -44,4 +44,13 @@ describe("agent defaults schema", () => {
it("rejects invalid contextInjection values", () => {
expect(() => AgentDefaultsSchema.parse({ contextInjection: "never" })).toThrow();
});
it("accepts embeddedPi.executionContract", () => {
const result = AgentDefaultsSchema.parse({
embeddedPi: {
executionContract: "strict-agentic",
},
})!;
expect(result.embeddedPi?.executionContract).toBe("strict-agentic");
});
});

View File

@@ -159,6 +159,7 @@ export const AgentDefaultsSchema = z
projectSettingsPolicy: z
.union([z.literal("trusted"), z.literal("sanitize"), z.literal("ignore")])
.optional(),
executionContract: z.union([z.literal("default"), z.literal("strict-agentic")]).optional(),
})
.strict()
.optional(),

View File

@@ -829,6 +829,12 @@ export const AgentEntrySchema = z
})
.strict()
.optional(),
embeddedPi: z
.object({
executionContract: z.union([z.literal("default"), z.literal("strict-agentic")]).optional(),
})
.strict()
.optional(),
sandbox: AgentSandboxSchema,
params: z.record(z.string(), z.unknown()).optional(),
tools: AgentToolsSchema,