fix: deliver media-bearing block replies without streaming

This commit is contained in:
Clawdbot
2026-05-06 17:53:40 +10:00
parent 0b88d6286c
commit 5dd5fba730
3 changed files with 53 additions and 6 deletions

View File

@@ -140,6 +140,7 @@ Docs: https://docs.openclaw.ai
- Agents/context engines: keep hidden OpenClaw runtime-context custom messages out of context-engine assemble, afterTurn, and ingest hooks so transcript reconstruction plugins only see conversation messages. Thanks @vincentkoc.
- Network/runtime: avoid importing Undici's package dispatcher during no-proxy timeout bootstrap so external channel plugin fetch requests with explicit Content-Length keep working. Fixes #78007. Thanks @shakkernerd.
- Gateway/shutdown: cancel delayed post-ready maintenance during close and suppress maintenance/cron startup after quick restarts, preventing orphaned background timers. Thanks @vincentkoc.
- Agents/TTS: send media-bearing block replies directly when block streaming is off, so agent `tts` tool audio attached to a final text reply is delivered instead of being consumed before final Telegram/media delivery. Thanks @Conan-Scott.
- Agents/generated media: treat attachment-style message tool actions as completed chat sends, preventing duplicate fallback media posts when generated files were already uploaded.
- Control UI/sessions: show each session's agent runtime in the Sessions table and allow filtering by runtime labels, matching the Agents panel runtime wording. Thanks @vincentkoc.
- Discord/streaming: show live reasoning text in progress drafts instead of a bare `Reasoning` status line.

View File

@@ -13,7 +13,7 @@ type BlockReplyPipelineLike = NonNullable<
>;
describe("createBlockReplyDeliveryHandler", () => {
it("keeps captioned media-bearing block replies buffered when block streaming is disabled", async () => {
it("sends captioned media-bearing block replies when block streaming is disabled", async () => {
const onBlockReply = vi.fn(async () => {});
const normalizeStreamingText = vi.fn((payload: { text?: string }) => ({
text: payload.text,
@@ -40,11 +40,57 @@ describe("createBlockReplyDeliveryHandler", () => {
replyToCurrent: true,
});
expect(onBlockReply).not.toHaveBeenCalled();
expect(directlySentBlockKeys).toEqual(new Set());
const expectedPayload = {
text: "here's the vibe",
mediaUrl: "/tmp/generated.png",
mediaUrls: ["/tmp/generated.png"],
replyToCurrent: true,
replyToId: undefined,
replyToTag: undefined,
audioAsVoice: false,
};
expect(onBlockReply).toHaveBeenCalledWith(expectedPayload);
expect(directlySentBlockKeys).toEqual(new Set([createBlockReplyContentKey(expectedPayload)]));
expect(typingSignals.signalTextDelta).toHaveBeenCalledWith("here's the vibe");
});
it("sends captioned audio-as-voice block replies when block streaming is disabled", async () => {
const onBlockReply = vi.fn(async () => {});
const directlySentBlockKeys = new Set<string>();
const handler = createBlockReplyDeliveryHandler({
onBlockReply,
normalizeStreamingText: (payload) => ({ text: payload.text, skip: false }),
applyReplyToMode: (payload) => payload,
typingSignals: {
signalTextDelta: vi.fn(async () => {}),
} as unknown as TypingSignaler,
blockStreamingEnabled: false,
blockReplyPipeline: null,
directlySentBlockKeys,
});
await handler({
text: "spoken confirmation",
mediaUrls: ["/tmp/voice.opus"],
audioAsVoice: true,
});
const expectedPayload = {
text: "spoken confirmation",
mediaUrl: "/tmp/voice.opus",
mediaUrls: ["/tmp/voice.opus"],
replyToId: undefined,
replyToCurrent: undefined,
replyToTag: undefined,
audioAsVoice: true,
};
expect(onBlockReply).toHaveBeenCalledWith(expectedPayload);
expect(directlySentBlockKeys).toEqual(new Set([createBlockReplyContentKey(expectedPayload)]));
});
it("sends media-only block replies when block streaming is disabled", async () => {
const onBlockReply = vi.fn(async () => {});
const directlySentBlockKeys = new Set<string>();

View File

@@ -157,9 +157,9 @@ export function createBlockReplyDeliveryHandler(params: {
trackingPayload: blockPayload,
payload: blockPayload,
});
} else if (blockHasMedia && !blockPayload.text) {
// Media-only block replies (for example orphaned tool attachments) are not reconstructible
// from the assistant's final text, so they still need a direct fallback when streaming is off.
} else if (blockHasMedia) {
// Media-bearing block replies (including text+media tool attachments) are not reliably
// reconstructible from the assistant's final text, so send them directly when streaming is off.
await sendDirectBlockReply({
onBlockReply: params.onBlockReply,
directlySentBlockKeys: params.directlySentBlockKeys,