fix: surface silent model fallback failures (#80917)

Merged via squash.

Prepared head SHA: 59be6e2db5
Co-authored-by: dutifulbob <261991368+dutifulbob@users.noreply.github.com>
Co-authored-by: osolmaz <2453968+osolmaz@users.noreply.github.com>
Reviewed-by: @osolmaz
This commit is contained in:
Bob
2026-05-12 14:19:10 +02:00
committed by GitHub
parent 9c125e821f
commit fe79efc31b
6 changed files with 566 additions and 2 deletions

View File

@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
- memory-wiki: require write scope for Obsidian search [AI]. (#80904) Thanks @pgondhi987.
- Build: skip copied metadata for bundled plugins that are excluded from build entries, preventing update/status rebuilds from advertising missing QQ Bot runtime files. (#80925)
- Control UI/sessions: nest subagent sessions under their parent session in the session picker dropdown using a visual `└─ ` prefix, making the parent-child relationship clear. Fixes #77628. (#78623) Thanks @chinar-amrutkar.
- Auto-reply: surface a visible error when the configured model backend fails and fallback produces no visible reply, while preserving intentional silent turns and side-effect-only deliveries. (#80917) Thanks @dutifulbob.
### Changes

View File

@@ -630,6 +630,20 @@ describe("overflow compaction in run loop", () => {
expect(result.messagingToolSentTexts).toEqual(["already delivered"]);
});
it("propagates deterministic approval prompt delivery from attempts", async () => {
mockedRunEmbeddedAttempt.mockResolvedValue(
makeAttemptResult({
assistantTexts: [],
didSendDeterministicApprovalPrompt: true,
}),
);
const result = await runEmbeddedPiAgent(baseParams);
expect(result.payloads).toBeUndefined();
expect(result.didSendDeterministicApprovalPrompt).toBe(true);
});
it("returns a timeout payload instead of a partial assistant fragment after stream timeout", async () => {
mockedRunEmbeddedAttempt.mockResolvedValue(
makeAttemptResult({

View File

@@ -2692,6 +2692,7 @@ export async function runEmbeddedAttempt(
getSuccessfulCronAdds,
getReplayState,
didSendViaMessagingTool,
didSendDeterministicApprovalPrompt,
getLastToolError,
setTerminalLifecycleMeta,
getUsageTotals,
@@ -4086,6 +4087,7 @@ export async function runEmbeddedAttempt(
currentAttemptAssistant,
lastToolError: getLastToolError?.(),
didSendViaMessagingTool: didSendViaMessagingTool(),
didSendDeterministicApprovalPrompt: didSendDeterministicApprovalPrompt(),
messagingToolSentTexts: getMessagingToolSentTexts(),
messagingToolSentMediaUrls: getMessagingToolSentMediaUrls(),
messagingToolSentTargets: getMessagingToolSentTargets(),

View File

@@ -177,6 +177,8 @@ export type EmbeddedPiRunResult = {
// True if a messaging tool successfully sent a message.
// Used to suppress agent's confirmation text.
didSendViaMessagingTool?: boolean;
// True if a deterministic approval prompt was sent through the tool-result channel.
didSendDeterministicApprovalPrompt?: boolean;
// Texts successfully sent via messaging tools during the run.
messagingToolSentTexts?: string[];
// Media URLs successfully sent via messaging tools during the run.

View File

@@ -931,6 +931,420 @@ describe("runReplyAgent typing (heartbeat)", () => {
}
});
it("surfaces a configured backend failure when fallback produces no visible reply", async () => {
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "NO_REPLY" }],
meta: {},
});
const fallbackSpy = vi
.spyOn(modelFallbackModule, "runWithModelFallback")
.mockImplementationOnce(
async ({ run }: { run: (provider: string, model: string) => Promise<unknown> }) => ({
result: await run("openai-codex", "gpt-5.5"),
provider: "openai-codex",
model: "gpt-5.5",
attempts: [
{
provider: "lmstudio",
model: "gemma-4-e4b-it",
error: "Connection error.",
reason: "timeout",
},
],
}),
);
try {
const { run } = createMinimalRun({
runOverrides: {
provider: "lmstudio",
model: "gemma-4-e4b-it",
},
sessionCtx: {
Provider: "discord",
OriginatingChannel: "discord",
MessageSid: "1503645939964055592",
},
});
const res = await run();
const payload = Array.isArray(res) ? res[0] : res;
expect(payload?.isError).toBe(true);
expect(payload?.text).toContain("configured model backend lmstudio/gemma-4-e4b-it");
expect(payload?.text).toContain("Fallback used openai-codex/gpt-5.5");
expect(payload?.text).toContain("no visible reply");
} finally {
fallbackSpy.mockRestore();
}
});
it("surfaces a configured backend failure when fallback returns no payloads", async () => {
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [],
meta: {},
});
const fallbackSpy = vi
.spyOn(modelFallbackModule, "runWithModelFallback")
.mockImplementationOnce(
async ({ run }: { run: (provider: string, model: string) => Promise<unknown> }) => ({
result: await run("openai-codex", "gpt-5.5"),
provider: "openai-codex",
model: "gpt-5.5",
attempts: [
{
provider: "lmstudio",
model: "gemma-4-e4b-it",
error: "Connection error.",
reason: "timeout",
},
],
}),
);
try {
const { run } = createMinimalRun({
runOverrides: {
provider: "lmstudio",
model: "gemma-4-e4b-it",
},
sessionCtx: {
Provider: "discord",
OriginatingChannel: "discord",
MessageSid: "1503645939964055592",
},
});
const res = await run();
const payload = Array.isArray(res) ? res[0] : res;
expect(payload?.isError).toBe(true);
expect(payload?.text).toContain("configured model backend lmstudio/gemma-4-e4b-it");
expect(payload?.text).toContain("Fallback used openai-codex/gpt-5.5");
expect(payload?.text).toContain("no visible reply");
} finally {
fallbackSpy.mockRestore();
}
});
it("surfaces a persisted configured backend failure when the active fallback is silent", async () => {
const sessionEntry: SessionEntry = {
sessionId: "session",
updatedAt: Date.now(),
providerOverride: "openai-codex",
modelOverride: "gpt-5.5",
modelOverrideSource: "auto",
modelOverrideFallbackOriginProvider: "lmstudio",
modelOverrideFallbackOriginModel: "gemma-4-e4b-it",
};
const sessionStore = { main: sessionEntry };
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "NO_REPLY" }],
meta: {},
});
const { run } = createMinimalRun({
runOverrides: {
provider: "openai-codex",
model: "gpt-5.5",
},
sessionEntry,
sessionStore,
sessionCtx: {
Provider: "discord",
OriginatingChannel: "discord",
MessageSid: "1503677587568722061",
},
});
const res = await run();
const payload = Array.isArray(res) ? res[0] : res;
expect(payload?.isError).toBe(true);
expect(payload?.text).toContain("configured model backend lmstudio/gemma-4-e4b-it");
expect(payload?.text).toContain("Fallback used openai-codex/gpt-5.5");
expect(payload?.text).toContain("no visible reply");
});
it("does not surface fallback silence when fallback already replied through a messaging tool", async () => {
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "already sent" }],
messagingToolSentTexts: ["already sent"],
messagingToolSentTargets: [{ tool: "message", provider: "discord", to: "channel:C1" }],
meta: {},
});
const fallbackSpy = vi
.spyOn(modelFallbackModule, "runWithModelFallback")
.mockImplementationOnce(
async ({ run }: { run: (provider: string, model: string) => Promise<unknown> }) => ({
result: await run("openai-codex", "gpt-5.5"),
provider: "openai-codex",
model: "gpt-5.5",
attempts: [
{
provider: "lmstudio",
model: "gemma-4-e4b-it",
error: "Connection error.",
reason: "timeout",
},
],
}),
);
try {
const { run } = createMinimalRun({
runOverrides: {
provider: "lmstudio",
model: "gemma-4-e4b-it",
messageProvider: "discord",
},
sessionCtx: {
Provider: "discord",
OriginatingChannel: "discord",
OriginatingTo: "channel:C1",
AccountId: "primary",
MessageSid: "1503645939964055592",
},
});
await expect(run()).resolves.toBeUndefined();
} finally {
fallbackSpy.mockRestore();
}
});
it("does not treat whitespace-only messaging evidence as fallback delivery", async () => {
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "NO_REPLY" }],
messagingToolSentTexts: [" "],
messagingToolSentMediaUrls: ["\t"],
messagingToolSentTargets: [
{ tool: "message", provider: "discord", to: "channel:C1", text: " " },
],
meta: {},
});
const fallbackSpy = vi
.spyOn(modelFallbackModule, "runWithModelFallback")
.mockImplementationOnce(
async ({ run }: { run: (provider: string, model: string) => Promise<unknown> }) => ({
result: await run("openai-codex", "gpt-5.5"),
provider: "openai-codex",
model: "gpt-5.5",
attempts: [
{
provider: "lmstudio",
model: "gemma-4-e4b-it",
error: "Connection error.",
reason: "timeout",
},
],
}),
);
try {
const { run } = createMinimalRun({
runOverrides: {
provider: "lmstudio",
model: "gemma-4-e4b-it",
messageProvider: "discord",
},
sessionCtx: {
Provider: "discord",
OriginatingChannel: "discord",
OriginatingTo: "channel:C1",
AccountId: "primary",
MessageSid: "1503645939964055592",
},
});
const res = await run();
const payload = Array.isArray(res) ? res[0] : res;
expect(payload?.isError).toBe(true);
expect(payload?.text).toContain("configured model backend lmstudio/gemma-4-e4b-it");
expect(payload?.text).toContain("Fallback used openai-codex/gpt-5.5");
} finally {
fallbackSpy.mockRestore();
}
});
it("does not surface fallback silence when fallback already completed a cron side effect", async () => {
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "NO_REPLY" }],
successfulCronAdds: 1,
meta: {},
});
const fallbackSpy = vi
.spyOn(modelFallbackModule, "runWithModelFallback")
.mockImplementationOnce(
async ({ run }: { run: (provider: string, model: string) => Promise<unknown> }) => ({
result: await run("openai-codex", "gpt-5.5"),
provider: "openai-codex",
model: "gpt-5.5",
attempts: [
{
provider: "lmstudio",
model: "gemma-4-e4b-it",
error: "Connection error.",
reason: "timeout",
},
],
}),
);
try {
const { run } = createMinimalRun({
runOverrides: {
provider: "lmstudio",
model: "gemma-4-e4b-it",
messageProvider: "discord",
},
sessionCtx: {
Provider: "discord",
OriginatingChannel: "discord",
OriginatingTo: "channel:C1",
AccountId: "primary",
MessageSid: "1503645939964055592",
},
});
await expect(run()).resolves.toBeUndefined();
} finally {
fallbackSpy.mockRestore();
}
});
it("does not surface fallback silence when fallback committed target-only messaging delivery", async () => {
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "NO_REPLY" }],
messagingToolSentTargets: [{ tool: "message", provider: "discord", to: "channel:C1" }],
meta: {},
});
const fallbackSpy = vi
.spyOn(modelFallbackModule, "runWithModelFallback")
.mockImplementationOnce(
async ({ run }: { run: (provider: string, model: string) => Promise<unknown> }) => ({
result: await run("openai-codex", "gpt-5.5"),
provider: "openai-codex",
model: "gpt-5.5",
attempts: [
{
provider: "lmstudio",
model: "gemma-4-e4b-it",
error: "Connection error.",
reason: "timeout",
},
],
}),
);
try {
const { run } = createMinimalRun({
runOverrides: {
provider: "lmstudio",
model: "gemma-4-e4b-it",
messageProvider: "discord",
},
sessionCtx: {
Provider: "discord",
OriginatingChannel: "discord",
OriginatingTo: "channel:C1",
AccountId: "primary",
MessageSid: "1503645939964055592",
},
});
await expect(run()).resolves.toBeUndefined();
} finally {
fallbackSpy.mockRestore();
}
});
it("does not surface fallback silence when fallback already delivered an approval prompt", async () => {
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [],
didSendDeterministicApprovalPrompt: true,
meta: {},
});
const fallbackSpy = vi
.spyOn(modelFallbackModule, "runWithModelFallback")
.mockImplementationOnce(
async ({ run }: { run: (provider: string, model: string) => Promise<unknown> }) => ({
result: await run("openai-codex", "gpt-5.5"),
provider: "openai-codex",
model: "gpt-5.5",
attempts: [
{
provider: "lmstudio",
model: "gemma-4-e4b-it",
error: "Connection error.",
reason: "timeout",
},
],
}),
);
try {
const { run } = createMinimalRun({
runOverrides: {
provider: "lmstudio",
model: "gemma-4-e4b-it",
},
sessionCtx: {
Provider: "discord",
OriginatingChannel: "discord",
MessageSid: "1503645939964055592",
},
});
await expect(run()).resolves.toBeUndefined();
} finally {
fallbackSpy.mockRestore();
}
});
it("preserves intentional fallback silence when the turn permits silent replies", async () => {
state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "NO_REPLY" }],
meta: {},
});
const fallbackSpy = vi
.spyOn(modelFallbackModule, "runWithModelFallback")
.mockImplementationOnce(
async ({ run }: { run: (provider: string, model: string) => Promise<unknown> }) => ({
result: await run("openai-codex", "gpt-5.5"),
provider: "openai-codex",
model: "gpt-5.5",
attempts: [
{
provider: "lmstudio",
model: "gemma-4-e4b-it",
error: "Connection error.",
reason: "timeout",
},
],
}),
);
try {
const { run } = createMinimalRun({
runOverrides: {
provider: "lmstudio",
model: "gemma-4-e4b-it",
allowEmptyAssistantReplyAsSilent: true,
},
sessionCtx: {
Provider: "discord",
OriginatingChannel: "discord",
OriginatingTo: "channel:C1",
ChatType: "channel",
WasMentioned: false,
MessageSid: "1503645939964055592",
},
});
await expect(run()).resolves.toBeUndefined();
} finally {
fallbackSpy.mockRestore();
}
});
it("announces model fallback only once per active fallback state", async () => {
const sessionEntry: SessionEntry = {
sessionId: "session",

View File

@@ -112,6 +112,94 @@ function markBeforeAgentRunBlockedPayloads(payloads: ReplyPayload[]): ReplyPaylo
);
}
function buildSilentFallbackFailurePayload(params: {
fallbackTransition: ReturnType<typeof resolveFallbackTransition>;
fallbackFailureKnown: boolean;
isHeartbeat: boolean;
hasSuccessfulSideEffectDelivery: boolean;
allowEmptyAssistantReplyAsSilent?: boolean;
silentExpected?: boolean;
}): ReplyPayload | undefined {
if (
params.isHeartbeat ||
params.allowEmptyAssistantReplyAsSilent === true ||
params.silentExpected === true ||
params.hasSuccessfulSideEffectDelivery ||
!params.fallbackTransition.fallbackActive ||
!params.fallbackFailureKnown
) {
return undefined;
}
return markReplyPayloadForSourceSuppressionDelivery({
text:
`⚠️ I couldn't reach the configured model backend ${params.fallbackTransition.selectedModelRef}. ` +
`Fallback used ${params.fallbackTransition.activeModelRef}, but it produced no visible reply.`,
isError: true,
});
}
function hasNonEmptyStringArray(value: unknown): boolean {
return Array.isArray(value) && value.some((entry) => typeof entry === "string" && entry.trim());
}
function hasCommittedMessagingTargetDeliveryEvidence(value: unknown): boolean {
if (!Array.isArray(value)) {
return false;
}
return value.some((entry) => {
if (!entry || typeof entry !== "object") {
return false;
}
const record = entry as { text?: unknown; mediaUrls?: unknown };
if ("text" in record || "mediaUrls" in record) {
return (
(typeof record.text === "string" && record.text.trim().length > 0) ||
hasNonEmptyStringArray(record.mediaUrls)
);
}
return true;
});
}
function hasSuccessfulSideEffectDelivery(params: {
blockReplyPipeline: { didStream: () => boolean; isAborted: () => boolean } | null;
directlySentBlockKeys?: Set<string>;
messagingToolSentTexts?: string[];
messagingToolSentMediaUrls?: string[];
messagingToolSentTargets?: unknown[];
successfulCronAdds?: number;
didSendDeterministicApprovalPrompt?: boolean;
}): boolean {
return (
(params.blockReplyPipeline?.didStream() && !params.blockReplyPipeline.isAborted()) ||
(params.directlySentBlockKeys?.size ?? 0) > 0 ||
hasNonEmptyStringArray(params.messagingToolSentTexts) ||
hasNonEmptyStringArray(params.messagingToolSentMediaUrls) ||
hasCommittedMessagingTargetDeliveryEvidence(params.messagingToolSentTargets) ||
(params.successfulCronAdds ?? 0) > 0 ||
params.didSendDeterministicApprovalPrompt === true
);
}
function resolveConfiguredFallbackModel(params: {
run: FollowupRun["run"];
fallbackStateEntry?: SessionEntry;
}): { provider: string; model: string; persistedAutoFallback: boolean } {
const entry = params.fallbackStateEntry;
if (entry?.modelOverrideSource === "auto") {
const originProvider = normalizeOptionalString(entry.modelOverrideFallbackOriginProvider);
const originModel = normalizeOptionalString(entry.modelOverrideFallbackOriginModel);
if (originProvider && originModel) {
return { provider: originProvider, model: originModel, persistedAutoFallback: true };
}
}
return {
provider: params.run.provider,
model: params.run.model,
persistedAutoFallback: false,
};
}
function buildInlinePluginStatusPayload(params: {
entry: SessionEntry | undefined;
includeTraceLines: boolean;
@@ -1436,10 +1524,14 @@ export async function runReplyAgent(params: {
const providerUsed =
runResult.meta?.agentMeta?.provider ?? fallbackProvider ?? followupRun.run.provider;
const verboseEnabled = resolvedVerboseLevel !== "off";
const selectedProvider = followupRun.run.provider;
const selectedModel = followupRun.run.model;
const fallbackStateEntry =
activeSessionEntry ?? (sessionKey ? activeSessionStore?.[sessionKey] : undefined);
const configuredFallbackModel = resolveConfiguredFallbackModel({
run: followupRun.run,
fallbackStateEntry,
});
const selectedProvider = configuredFallbackModel.provider;
const selectedModel = configuredFallbackModel.model;
const fallbackTransition = resolveFallbackTransition({
selectedProvider,
selectedModel,
@@ -1512,10 +1604,45 @@ export async function runReplyAgent(params: {
cliSessionBinding,
});
const returnSilentFallbackFailureIfNeeded = async (): Promise<ReplyPayload | undefined> => {
const silentFallbackFailurePayload = buildSilentFallbackFailurePayload({
fallbackTransition,
fallbackFailureKnown:
fallbackAttempts.length > 0 || configuredFallbackModel.persistedAutoFallback,
isHeartbeat,
hasSuccessfulSideEffectDelivery: hasSuccessfulSideEffectDelivery({
blockReplyPipeline,
directlySentBlockKeys,
messagingToolSentTexts: runResult.messagingToolSentTexts,
messagingToolSentMediaUrls: runResult.messagingToolSentMediaUrls,
messagingToolSentTargets: runResult.messagingToolSentTargets,
successfulCronAdds: runResult.successfulCronAdds,
didSendDeterministicApprovalPrompt: runResult.didSendDeterministicApprovalPrompt,
}),
allowEmptyAssistantReplyAsSilent: followupRun.run.allowEmptyAssistantReplyAsSilent,
silentExpected: followupRun.run.silentExpected,
});
if (!silentFallbackFailurePayload) {
return undefined;
}
replyOperation.fail(
"run_failed",
new Error(
`configured model backend ${fallbackTransition.selectedModelRef} failed and fallback ${fallbackTransition.activeModelRef} produced no visible reply`,
),
);
await signalTypingIfNeeded([silentFallbackFailurePayload], typingSignals);
return returnWithQueuedFollowupDrain(silentFallbackFailurePayload);
};
// Drain any late tool/block deliveries before deciding there's "nothing to send".
// Otherwise, a late typing trigger (e.g. from a tool callback) can outlive the run and
// keep the typing indicator stuck.
if (payloadArray.length === 0) {
const silentFallbackFailurePayload = await returnSilentFallbackFailureIfNeeded();
if (silentFallbackFailurePayload) {
return silentFallbackFailurePayload;
}
return returnWithQueuedFollowupDrain(undefined);
}
@@ -1548,6 +1675,10 @@ export async function runReplyAgent(params: {
didLogHeartbeatStrip = payloadResult.didLogHeartbeatStrip;
if (replyPayloads.length === 0) {
const silentFallbackFailurePayload = await returnSilentFallbackFailureIfNeeded();
if (silentFallbackFailurePayload) {
return silentFallbackFailurePayload;
}
return returnWithQueuedFollowupDrain(undefined);
}