diff --git a/CHANGELOG.md b/CHANGELOG.md index bfcee5a2566..e6f196558a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,7 @@ Docs: https://docs.openclaw.ai - Gateway/Webchat NO_REPLY streaming: suppress assistant lead-fragment deltas that are prefixes of `NO_REPLY` and keep final-message buffering in sync, preventing partial `NO` leaks on silent-response runs while preserving legitimate short replies. (#32073) Thanks @liuxiaopai-ai. - Tools/fsPolicy propagation: honor `tools.fs.workspaceOnly` for image/pdf local-root allowlists so non-sandbox media paths outside workspace are rejected when workspace-only mode is enabled. (#31882) Thanks @justinhuangcode. - Daemon/Homebrew runtime pinning: resolve Homebrew Cellar Node paths to stable Homebrew-managed symlinks (including versioned formulas like `node@22`) so gateway installs keep the intended runtime across brew upgrades. (#32185) Thanks @scoootscooob. +- Discord/audio preflight mentions: detect audio attachments via Discord `content_type` and gate preflight transcription on typed text (not media placeholders), so guild voice-note mentions are transcribed and matched correctly. (#32136) Thanks @jnMetaCode. - Memory/LanceDB embeddings: forward configured `embedding.dimensions` into OpenAI embeddings requests so vector size and API output dimensions stay aligned when dimensions are explicitly configured. (#32036) Thanks @scotthuang. - Failover/error classification: treat HTTP `529` (provider overloaded, common with Anthropic-compatible APIs) as `rate_limit` so model failover can engage instead of misclassifying the error path. (#31854) Thanks @bugkill3r. - Plugin command/runtime hardening: validate and normalize plugin command name/description at registration boundaries, and guard Telegram native menu normalization paths so malformed plugin command specs cannot crash startup (`trim` on undefined). (#31997) Fixes #31944. Thanks @liuxiaopai-ai. diff --git a/src/discord/monitor/message-handler.preflight.test.ts b/src/discord/monitor/message-handler.preflight.test.ts index bef9350bddf..197b9509692 100644 --- a/src/discord/monitor/message-handler.preflight.test.ts +++ b/src/discord/monitor/message-handler.preflight.test.ts @@ -1,5 +1,11 @@ import { ChannelType } from "@buape/carbon"; -import { beforeEach, describe, expect, it } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const transcribeFirstAudioMock = vi.hoisted(() => vi.fn()); + +vi.mock("../../media-understanding/audio-preflight.js", () => ({ + transcribeFirstAudio: (...args: unknown[]) => transcribeFirstAudioMock(...args), +})); import { __testing as sessionBindingTesting, registerSessionBindingAdapter, @@ -74,6 +80,7 @@ describe("resolvePreflightMentionRequirement", () => { describe("preflightDiscordMessage", () => { beforeEach(() => { sessionBindingTesting.resetSessionBindingAdaptersForTests(); + transcribeFirstAudioMock.mockReset(); }); it("bypasses mention gating in bound threads for allowed bot senders", async () => { @@ -165,6 +172,101 @@ describe("preflightDiscordMessage", () => { expect(result?.boundSessionKey).toBe(threadBinding.targetSessionKey); expect(result?.shouldRequireMention).toBe(false); }); + + it("uses attachment content_type for guild audio preflight mention detection", async () => { + transcribeFirstAudioMock.mockResolvedValue("hey openclaw"); + + const channelId = "channel-audio-1"; + const client = { + fetchChannel: async (id: string) => { + if (id === channelId) { + return { + id: channelId, + type: ChannelType.GuildText, + name: "general", + }; + } + return null; + }, + } as unknown as import("@buape/carbon").Client; + + const message = { + id: "m-audio-1", + content: "", + timestamp: new Date().toISOString(), + channelId, + attachments: [ + { + id: "att-1", + url: "https://cdn.discordapp.com/attachments/voice.ogg", + content_type: "audio/ogg", + filename: "voice.ogg", + }, + ], + mentionedUsers: [], + mentionedRoles: [], + mentionedEveryone: false, + author: { + id: "user-1", + bot: false, + username: "Alice", + }, + } as unknown as import("@buape/carbon").Message; + + const result = await preflightDiscordMessage({ + cfg: { + session: { + mainKey: "main", + scope: "per-sender", + }, + messages: { + groupChat: { + mentionPatterns: ["openclaw"], + }, + }, + } as import("../../config/config.js").OpenClawConfig, + discordConfig: {} as NonNullable< + import("../../config/config.js").OpenClawConfig["channels"] + >["discord"], + accountId: "default", + token: "token", + runtime: {} as import("../../runtime.js").RuntimeEnv, + botUserId: "openclaw-bot", + guildHistories: new Map(), + historyLimit: 0, + mediaMaxBytes: 1_000_000, + textLimit: 2_000, + replyToMode: "all", + dmEnabled: true, + groupDmEnabled: true, + ackReactionScope: "direct", + groupPolicy: "open", + threadBindings: createNoopThreadBindingManager("default"), + data: { + channel_id: channelId, + guild_id: "guild-1", + guild: { + id: "guild-1", + name: "Guild One", + }, + author: message.author, + message, + } as unknown as import("./listeners.js").DiscordMessageEvent, + client, + }); + + expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1); + expect(transcribeFirstAudioMock).toHaveBeenCalledWith( + expect.objectContaining({ + ctx: expect.objectContaining({ + MediaUrls: ["https://cdn.discordapp.com/attachments/voice.ogg"], + MediaTypes: ["audio/ogg"], + }), + }), + ); + expect(result).not.toBeNull(); + expect(result?.wasMentioned).toBe(true); + }); }); describe("shouldIgnoreBoundThreadWebhookMessage", () => { diff --git a/src/discord/monitor/message-handler.preflight.ts b/src/discord/monitor/message-handler.preflight.ts index 0916d2a15af..471d8b4c24e 100644 --- a/src/discord/monitor/message-handler.preflight.ts +++ b/src/discord/monitor/message-handler.preflight.ts @@ -504,11 +504,13 @@ export async function preflightDiscordMessage( const hasAudioAttachment = message.attachments?.some((att: { content_type?: string }) => att.content_type?.startsWith("audio/"), ); + const hasTypedText = Boolean(message.content?.trim()); const needsPreflightTranscription = !isDirectMessage && shouldRequireMention && hasAudioAttachment && - !baseText && + // `baseText` includes media placeholders; gate on typed text only. + !hasTypedText && mentionRegexes.length > 0; if (needsPreflightTranscription) { @@ -541,10 +543,11 @@ export async function preflightDiscordMessage( } } + const mentionText = hasTypedText ? baseText : ""; const wasMentioned = !isDirectMessage && matchesMentionWithExplicit({ - text: baseText, + text: mentionText, mentionRegexes, explicit: { hasAnyMention,