diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e0f8dcfbd6..1117891fc06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai - memory-wiki: require write scope for Obsidian search [AI]. (#80904) Thanks @pgondhi987. - Build: skip copied metadata for bundled plugins that are excluded from build entries, preventing update/status rebuilds from advertising missing QQ Bot runtime files. (#80925) - Control UI/sessions: nest subagent sessions under their parent session in the session picker dropdown using a visual `└─ ` prefix, making the parent-child relationship clear. Fixes #77628. (#78623) Thanks @chinar-amrutkar. +- Auto-reply: surface a visible error when the configured model backend fails and fallback produces no visible reply, while preserving intentional silent turns and side-effect-only deliveries. (#80917) Thanks @dutifulbob. ### Changes diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts index 43d7d346d6c..457fbf9d770 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts @@ -630,6 +630,20 @@ describe("overflow compaction in run loop", () => { expect(result.messagingToolSentTexts).toEqual(["already delivered"]); }); + it("propagates deterministic approval prompt delivery from attempts", async () => { + mockedRunEmbeddedAttempt.mockResolvedValue( + makeAttemptResult({ + assistantTexts: [], + didSendDeterministicApprovalPrompt: true, + }), + ); + + const result = await runEmbeddedPiAgent(baseParams); + + expect(result.payloads).toBeUndefined(); + expect(result.didSendDeterministicApprovalPrompt).toBe(true); + }); + it("returns a timeout payload instead of a partial assistant fragment after stream timeout", async () => { mockedRunEmbeddedAttempt.mockResolvedValue( makeAttemptResult({ diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 2384c81c9e2..e043d1aed10 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -2692,6 +2692,7 @@ export async function runEmbeddedAttempt( getSuccessfulCronAdds, getReplayState, didSendViaMessagingTool, + didSendDeterministicApprovalPrompt, getLastToolError, setTerminalLifecycleMeta, getUsageTotals, @@ -4086,6 +4087,7 @@ export async function runEmbeddedAttempt( currentAttemptAssistant, lastToolError: getLastToolError?.(), didSendViaMessagingTool: didSendViaMessagingTool(), + didSendDeterministicApprovalPrompt: didSendDeterministicApprovalPrompt(), messagingToolSentTexts: getMessagingToolSentTexts(), messagingToolSentMediaUrls: getMessagingToolSentMediaUrls(), messagingToolSentTargets: getMessagingToolSentTargets(), diff --git a/src/agents/pi-embedded-runner/types.ts b/src/agents/pi-embedded-runner/types.ts index a669ccda869..d1177ed3cd5 100644 --- a/src/agents/pi-embedded-runner/types.ts +++ b/src/agents/pi-embedded-runner/types.ts @@ -177,6 +177,8 @@ export type EmbeddedPiRunResult = { // True if a messaging tool successfully sent a message. // Used to suppress agent's confirmation text. didSendViaMessagingTool?: boolean; + // True if a deterministic approval prompt was sent through the tool-result channel. + didSendDeterministicApprovalPrompt?: boolean; // Texts successfully sent via messaging tools during the run. messagingToolSentTexts?: string[]; // Media URLs successfully sent via messaging tools during the run. diff --git a/src/auto-reply/reply/agent-runner.runreplyagent.e2e.test.ts b/src/auto-reply/reply/agent-runner.runreplyagent.e2e.test.ts index c3ef18ba461..310abb03147 100644 --- a/src/auto-reply/reply/agent-runner.runreplyagent.e2e.test.ts +++ b/src/auto-reply/reply/agent-runner.runreplyagent.e2e.test.ts @@ -931,6 +931,420 @@ describe("runReplyAgent typing (heartbeat)", () => { } }); + it("surfaces a configured backend failure when fallback produces no visible reply", async () => { + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "NO_REPLY" }], + meta: {}, + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("openai-codex", "gpt-5.5"), + provider: "openai-codex", + model: "gpt-5.5", + attempts: [ + { + provider: "lmstudio", + model: "gemma-4-e4b-it", + error: "Connection error.", + reason: "timeout", + }, + ], + }), + ); + + try { + const { run } = createMinimalRun({ + runOverrides: { + provider: "lmstudio", + model: "gemma-4-e4b-it", + }, + sessionCtx: { + Provider: "discord", + OriginatingChannel: "discord", + MessageSid: "1503645939964055592", + }, + }); + const res = await run(); + const payload = Array.isArray(res) ? res[0] : res; + + expect(payload?.isError).toBe(true); + expect(payload?.text).toContain("configured model backend lmstudio/gemma-4-e4b-it"); + expect(payload?.text).toContain("Fallback used openai-codex/gpt-5.5"); + expect(payload?.text).toContain("no visible reply"); + } finally { + fallbackSpy.mockRestore(); + } + }); + + it("surfaces a configured backend failure when fallback returns no payloads", async () => { + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [], + meta: {}, + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("openai-codex", "gpt-5.5"), + provider: "openai-codex", + model: "gpt-5.5", + attempts: [ + { + provider: "lmstudio", + model: "gemma-4-e4b-it", + error: "Connection error.", + reason: "timeout", + }, + ], + }), + ); + + try { + const { run } = createMinimalRun({ + runOverrides: { + provider: "lmstudio", + model: "gemma-4-e4b-it", + }, + sessionCtx: { + Provider: "discord", + OriginatingChannel: "discord", + MessageSid: "1503645939964055592", + }, + }); + const res = await run(); + const payload = Array.isArray(res) ? res[0] : res; + + expect(payload?.isError).toBe(true); + expect(payload?.text).toContain("configured model backend lmstudio/gemma-4-e4b-it"); + expect(payload?.text).toContain("Fallback used openai-codex/gpt-5.5"); + expect(payload?.text).toContain("no visible reply"); + } finally { + fallbackSpy.mockRestore(); + } + }); + + it("surfaces a persisted configured backend failure when the active fallback is silent", async () => { + const sessionEntry: SessionEntry = { + sessionId: "session", + updatedAt: Date.now(), + providerOverride: "openai-codex", + modelOverride: "gpt-5.5", + modelOverrideSource: "auto", + modelOverrideFallbackOriginProvider: "lmstudio", + modelOverrideFallbackOriginModel: "gemma-4-e4b-it", + }; + const sessionStore = { main: sessionEntry }; + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "NO_REPLY" }], + meta: {}, + }); + + const { run } = createMinimalRun({ + runOverrides: { + provider: "openai-codex", + model: "gpt-5.5", + }, + sessionEntry, + sessionStore, + sessionCtx: { + Provider: "discord", + OriginatingChannel: "discord", + MessageSid: "1503677587568722061", + }, + }); + const res = await run(); + const payload = Array.isArray(res) ? res[0] : res; + + expect(payload?.isError).toBe(true); + expect(payload?.text).toContain("configured model backend lmstudio/gemma-4-e4b-it"); + expect(payload?.text).toContain("Fallback used openai-codex/gpt-5.5"); + expect(payload?.text).toContain("no visible reply"); + }); + + it("does not surface fallback silence when fallback already replied through a messaging tool", async () => { + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "already sent" }], + messagingToolSentTexts: ["already sent"], + messagingToolSentTargets: [{ tool: "message", provider: "discord", to: "channel:C1" }], + meta: {}, + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("openai-codex", "gpt-5.5"), + provider: "openai-codex", + model: "gpt-5.5", + attempts: [ + { + provider: "lmstudio", + model: "gemma-4-e4b-it", + error: "Connection error.", + reason: "timeout", + }, + ], + }), + ); + + try { + const { run } = createMinimalRun({ + runOverrides: { + provider: "lmstudio", + model: "gemma-4-e4b-it", + messageProvider: "discord", + }, + sessionCtx: { + Provider: "discord", + OriginatingChannel: "discord", + OriginatingTo: "channel:C1", + AccountId: "primary", + MessageSid: "1503645939964055592", + }, + }); + + await expect(run()).resolves.toBeUndefined(); + } finally { + fallbackSpy.mockRestore(); + } + }); + + it("does not treat whitespace-only messaging evidence as fallback delivery", async () => { + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "NO_REPLY" }], + messagingToolSentTexts: [" "], + messagingToolSentMediaUrls: ["\t"], + messagingToolSentTargets: [ + { tool: "message", provider: "discord", to: "channel:C1", text: " " }, + ], + meta: {}, + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("openai-codex", "gpt-5.5"), + provider: "openai-codex", + model: "gpt-5.5", + attempts: [ + { + provider: "lmstudio", + model: "gemma-4-e4b-it", + error: "Connection error.", + reason: "timeout", + }, + ], + }), + ); + + try { + const { run } = createMinimalRun({ + runOverrides: { + provider: "lmstudio", + model: "gemma-4-e4b-it", + messageProvider: "discord", + }, + sessionCtx: { + Provider: "discord", + OriginatingChannel: "discord", + OriginatingTo: "channel:C1", + AccountId: "primary", + MessageSid: "1503645939964055592", + }, + }); + const res = await run(); + const payload = Array.isArray(res) ? res[0] : res; + + expect(payload?.isError).toBe(true); + expect(payload?.text).toContain("configured model backend lmstudio/gemma-4-e4b-it"); + expect(payload?.text).toContain("Fallback used openai-codex/gpt-5.5"); + } finally { + fallbackSpy.mockRestore(); + } + }); + + it("does not surface fallback silence when fallback already completed a cron side effect", async () => { + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "NO_REPLY" }], + successfulCronAdds: 1, + meta: {}, + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("openai-codex", "gpt-5.5"), + provider: "openai-codex", + model: "gpt-5.5", + attempts: [ + { + provider: "lmstudio", + model: "gemma-4-e4b-it", + error: "Connection error.", + reason: "timeout", + }, + ], + }), + ); + + try { + const { run } = createMinimalRun({ + runOverrides: { + provider: "lmstudio", + model: "gemma-4-e4b-it", + messageProvider: "discord", + }, + sessionCtx: { + Provider: "discord", + OriginatingChannel: "discord", + OriginatingTo: "channel:C1", + AccountId: "primary", + MessageSid: "1503645939964055592", + }, + }); + + await expect(run()).resolves.toBeUndefined(); + } finally { + fallbackSpy.mockRestore(); + } + }); + + it("does not surface fallback silence when fallback committed target-only messaging delivery", async () => { + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "NO_REPLY" }], + messagingToolSentTargets: [{ tool: "message", provider: "discord", to: "channel:C1" }], + meta: {}, + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("openai-codex", "gpt-5.5"), + provider: "openai-codex", + model: "gpt-5.5", + attempts: [ + { + provider: "lmstudio", + model: "gemma-4-e4b-it", + error: "Connection error.", + reason: "timeout", + }, + ], + }), + ); + + try { + const { run } = createMinimalRun({ + runOverrides: { + provider: "lmstudio", + model: "gemma-4-e4b-it", + messageProvider: "discord", + }, + sessionCtx: { + Provider: "discord", + OriginatingChannel: "discord", + OriginatingTo: "channel:C1", + AccountId: "primary", + MessageSid: "1503645939964055592", + }, + }); + + await expect(run()).resolves.toBeUndefined(); + } finally { + fallbackSpy.mockRestore(); + } + }); + + it("does not surface fallback silence when fallback already delivered an approval prompt", async () => { + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [], + didSendDeterministicApprovalPrompt: true, + meta: {}, + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("openai-codex", "gpt-5.5"), + provider: "openai-codex", + model: "gpt-5.5", + attempts: [ + { + provider: "lmstudio", + model: "gemma-4-e4b-it", + error: "Connection error.", + reason: "timeout", + }, + ], + }), + ); + + try { + const { run } = createMinimalRun({ + runOverrides: { + provider: "lmstudio", + model: "gemma-4-e4b-it", + }, + sessionCtx: { + Provider: "discord", + OriginatingChannel: "discord", + MessageSid: "1503645939964055592", + }, + }); + + await expect(run()).resolves.toBeUndefined(); + } finally { + fallbackSpy.mockRestore(); + } + }); + + it("preserves intentional fallback silence when the turn permits silent replies", async () => { + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "NO_REPLY" }], + meta: {}, + }); + const fallbackSpy = vi + .spyOn(modelFallbackModule, "runWithModelFallback") + .mockImplementationOnce( + async ({ run }: { run: (provider: string, model: string) => Promise }) => ({ + result: await run("openai-codex", "gpt-5.5"), + provider: "openai-codex", + model: "gpt-5.5", + attempts: [ + { + provider: "lmstudio", + model: "gemma-4-e4b-it", + error: "Connection error.", + reason: "timeout", + }, + ], + }), + ); + + try { + const { run } = createMinimalRun({ + runOverrides: { + provider: "lmstudio", + model: "gemma-4-e4b-it", + allowEmptyAssistantReplyAsSilent: true, + }, + sessionCtx: { + Provider: "discord", + OriginatingChannel: "discord", + OriginatingTo: "channel:C1", + ChatType: "channel", + WasMentioned: false, + MessageSid: "1503645939964055592", + }, + }); + + await expect(run()).resolves.toBeUndefined(); + } finally { + fallbackSpy.mockRestore(); + } + }); + it("announces model fallback only once per active fallback state", async () => { const sessionEntry: SessionEntry = { sessionId: "session", diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 27784e8933a..f1beb0386d4 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -112,6 +112,94 @@ function markBeforeAgentRunBlockedPayloads(payloads: ReplyPayload[]): ReplyPaylo ); } +function buildSilentFallbackFailurePayload(params: { + fallbackTransition: ReturnType; + fallbackFailureKnown: boolean; + isHeartbeat: boolean; + hasSuccessfulSideEffectDelivery: boolean; + allowEmptyAssistantReplyAsSilent?: boolean; + silentExpected?: boolean; +}): ReplyPayload | undefined { + if ( + params.isHeartbeat || + params.allowEmptyAssistantReplyAsSilent === true || + params.silentExpected === true || + params.hasSuccessfulSideEffectDelivery || + !params.fallbackTransition.fallbackActive || + !params.fallbackFailureKnown + ) { + return undefined; + } + return markReplyPayloadForSourceSuppressionDelivery({ + text: + `⚠️ I couldn't reach the configured model backend ${params.fallbackTransition.selectedModelRef}. ` + + `Fallback used ${params.fallbackTransition.activeModelRef}, but it produced no visible reply.`, + isError: true, + }); +} + +function hasNonEmptyStringArray(value: unknown): boolean { + return Array.isArray(value) && value.some((entry) => typeof entry === "string" && entry.trim()); +} + +function hasCommittedMessagingTargetDeliveryEvidence(value: unknown): boolean { + if (!Array.isArray(value)) { + return false; + } + return value.some((entry) => { + if (!entry || typeof entry !== "object") { + return false; + } + const record = entry as { text?: unknown; mediaUrls?: unknown }; + if ("text" in record || "mediaUrls" in record) { + return ( + (typeof record.text === "string" && record.text.trim().length > 0) || + hasNonEmptyStringArray(record.mediaUrls) + ); + } + return true; + }); +} + +function hasSuccessfulSideEffectDelivery(params: { + blockReplyPipeline: { didStream: () => boolean; isAborted: () => boolean } | null; + directlySentBlockKeys?: Set; + messagingToolSentTexts?: string[]; + messagingToolSentMediaUrls?: string[]; + messagingToolSentTargets?: unknown[]; + successfulCronAdds?: number; + didSendDeterministicApprovalPrompt?: boolean; +}): boolean { + return ( + (params.blockReplyPipeline?.didStream() && !params.blockReplyPipeline.isAborted()) || + (params.directlySentBlockKeys?.size ?? 0) > 0 || + hasNonEmptyStringArray(params.messagingToolSentTexts) || + hasNonEmptyStringArray(params.messagingToolSentMediaUrls) || + hasCommittedMessagingTargetDeliveryEvidence(params.messagingToolSentTargets) || + (params.successfulCronAdds ?? 0) > 0 || + params.didSendDeterministicApprovalPrompt === true + ); +} + +function resolveConfiguredFallbackModel(params: { + run: FollowupRun["run"]; + fallbackStateEntry?: SessionEntry; +}): { provider: string; model: string; persistedAutoFallback: boolean } { + const entry = params.fallbackStateEntry; + if (entry?.modelOverrideSource === "auto") { + const originProvider = normalizeOptionalString(entry.modelOverrideFallbackOriginProvider); + const originModel = normalizeOptionalString(entry.modelOverrideFallbackOriginModel); + if (originProvider && originModel) { + return { provider: originProvider, model: originModel, persistedAutoFallback: true }; + } + } + return { + provider: params.run.provider, + model: params.run.model, + persistedAutoFallback: false, + }; +} + function buildInlinePluginStatusPayload(params: { entry: SessionEntry | undefined; includeTraceLines: boolean; @@ -1436,10 +1524,14 @@ export async function runReplyAgent(params: { const providerUsed = runResult.meta?.agentMeta?.provider ?? fallbackProvider ?? followupRun.run.provider; const verboseEnabled = resolvedVerboseLevel !== "off"; - const selectedProvider = followupRun.run.provider; - const selectedModel = followupRun.run.model; const fallbackStateEntry = activeSessionEntry ?? (sessionKey ? activeSessionStore?.[sessionKey] : undefined); + const configuredFallbackModel = resolveConfiguredFallbackModel({ + run: followupRun.run, + fallbackStateEntry, + }); + const selectedProvider = configuredFallbackModel.provider; + const selectedModel = configuredFallbackModel.model; const fallbackTransition = resolveFallbackTransition({ selectedProvider, selectedModel, @@ -1512,10 +1604,45 @@ export async function runReplyAgent(params: { cliSessionBinding, }); + const returnSilentFallbackFailureIfNeeded = async (): Promise => { + const silentFallbackFailurePayload = buildSilentFallbackFailurePayload({ + fallbackTransition, + fallbackFailureKnown: + fallbackAttempts.length > 0 || configuredFallbackModel.persistedAutoFallback, + isHeartbeat, + hasSuccessfulSideEffectDelivery: hasSuccessfulSideEffectDelivery({ + blockReplyPipeline, + directlySentBlockKeys, + messagingToolSentTexts: runResult.messagingToolSentTexts, + messagingToolSentMediaUrls: runResult.messagingToolSentMediaUrls, + messagingToolSentTargets: runResult.messagingToolSentTargets, + successfulCronAdds: runResult.successfulCronAdds, + didSendDeterministicApprovalPrompt: runResult.didSendDeterministicApprovalPrompt, + }), + allowEmptyAssistantReplyAsSilent: followupRun.run.allowEmptyAssistantReplyAsSilent, + silentExpected: followupRun.run.silentExpected, + }); + if (!silentFallbackFailurePayload) { + return undefined; + } + replyOperation.fail( + "run_failed", + new Error( + `configured model backend ${fallbackTransition.selectedModelRef} failed and fallback ${fallbackTransition.activeModelRef} produced no visible reply`, + ), + ); + await signalTypingIfNeeded([silentFallbackFailurePayload], typingSignals); + return returnWithQueuedFollowupDrain(silentFallbackFailurePayload); + }; + // Drain any late tool/block deliveries before deciding there's "nothing to send". // Otherwise, a late typing trigger (e.g. from a tool callback) can outlive the run and // keep the typing indicator stuck. if (payloadArray.length === 0) { + const silentFallbackFailurePayload = await returnSilentFallbackFailureIfNeeded(); + if (silentFallbackFailurePayload) { + return silentFallbackFailurePayload; + } return returnWithQueuedFollowupDrain(undefined); } @@ -1548,6 +1675,10 @@ export async function runReplyAgent(params: { didLogHeartbeatStrip = payloadResult.didLogHeartbeatStrip; if (replyPayloads.length === 0) { + const silentFallbackFailurePayload = await returnSilentFallbackFailureIfNeeded(); + if (silentFallbackFailurePayload) { + return silentFallbackFailurePayload; + } return returnWithQueuedFollowupDrain(undefined); }