fix: harden discord audio preflight mention detection (#32136) (thanks @jnMetaCode)

This commit is contained in:
Peter Steinberger
2026-03-02 22:41:18 +00:00
parent b9b47f5002
commit 0b5d8e5b47
3 changed files with 109 additions and 3 deletions

View File

@@ -62,6 +62,7 @@ Docs: https://docs.openclaw.ai
- Gateway/Webchat NO_REPLY streaming: suppress assistant lead-fragment deltas that are prefixes of `NO_REPLY` and keep final-message buffering in sync, preventing partial `NO` leaks on silent-response runs while preserving legitimate short replies. (#32073) Thanks @liuxiaopai-ai.
- Tools/fsPolicy propagation: honor `tools.fs.workspaceOnly` for image/pdf local-root allowlists so non-sandbox media paths outside workspace are rejected when workspace-only mode is enabled. (#31882) Thanks @justinhuangcode.
- Daemon/Homebrew runtime pinning: resolve Homebrew Cellar Node paths to stable Homebrew-managed symlinks (including versioned formulas like `node@22`) so gateway installs keep the intended runtime across brew upgrades. (#32185) Thanks @scoootscooob.
- Discord/audio preflight mentions: detect audio attachments via Discord `content_type` and gate preflight transcription on typed text (not media placeholders), so guild voice-note mentions are transcribed and matched correctly. (#32136) Thanks @jnMetaCode.
- Memory/LanceDB embeddings: forward configured `embedding.dimensions` into OpenAI embeddings requests so vector size and API output dimensions stay aligned when dimensions are explicitly configured. (#32036) Thanks @scotthuang.
- Failover/error classification: treat HTTP `529` (provider overloaded, common with Anthropic-compatible APIs) as `rate_limit` so model failover can engage instead of misclassifying the error path. (#31854) Thanks @bugkill3r.
- Plugin command/runtime hardening: validate and normalize plugin command name/description at registration boundaries, and guard Telegram native menu normalization paths so malformed plugin command specs cannot crash startup (`trim` on undefined). (#31997) Fixes #31944. Thanks @liuxiaopai-ai.

View File

@@ -1,5 +1,11 @@
import { ChannelType } from "@buape/carbon";
import { beforeEach, describe, expect, it } from "vitest";
import { beforeEach, describe, expect, it, vi } from "vitest";
const transcribeFirstAudioMock = vi.hoisted(() => vi.fn());
vi.mock("../../media-understanding/audio-preflight.js", () => ({
transcribeFirstAudio: (...args: unknown[]) => transcribeFirstAudioMock(...args),
}));
import {
__testing as sessionBindingTesting,
registerSessionBindingAdapter,
@@ -74,6 +80,7 @@ describe("resolvePreflightMentionRequirement", () => {
describe("preflightDiscordMessage", () => {
beforeEach(() => {
sessionBindingTesting.resetSessionBindingAdaptersForTests();
transcribeFirstAudioMock.mockReset();
});
it("bypasses mention gating in bound threads for allowed bot senders", async () => {
@@ -165,6 +172,101 @@ describe("preflightDiscordMessage", () => {
expect(result?.boundSessionKey).toBe(threadBinding.targetSessionKey);
expect(result?.shouldRequireMention).toBe(false);
});
it("uses attachment content_type for guild audio preflight mention detection", async () => {
transcribeFirstAudioMock.mockResolvedValue("hey openclaw");
const channelId = "channel-audio-1";
const client = {
fetchChannel: async (id: string) => {
if (id === channelId) {
return {
id: channelId,
type: ChannelType.GuildText,
name: "general",
};
}
return null;
},
} as unknown as import("@buape/carbon").Client;
const message = {
id: "m-audio-1",
content: "",
timestamp: new Date().toISOString(),
channelId,
attachments: [
{
id: "att-1",
url: "https://cdn.discordapp.com/attachments/voice.ogg",
content_type: "audio/ogg",
filename: "voice.ogg",
},
],
mentionedUsers: [],
mentionedRoles: [],
mentionedEveryone: false,
author: {
id: "user-1",
bot: false,
username: "Alice",
},
} as unknown as import("@buape/carbon").Message;
const result = await preflightDiscordMessage({
cfg: {
session: {
mainKey: "main",
scope: "per-sender",
},
messages: {
groupChat: {
mentionPatterns: ["openclaw"],
},
},
} as import("../../config/config.js").OpenClawConfig,
discordConfig: {} as NonNullable<
import("../../config/config.js").OpenClawConfig["channels"]
>["discord"],
accountId: "default",
token: "token",
runtime: {} as import("../../runtime.js").RuntimeEnv,
botUserId: "openclaw-bot",
guildHistories: new Map(),
historyLimit: 0,
mediaMaxBytes: 1_000_000,
textLimit: 2_000,
replyToMode: "all",
dmEnabled: true,
groupDmEnabled: true,
ackReactionScope: "direct",
groupPolicy: "open",
threadBindings: createNoopThreadBindingManager("default"),
data: {
channel_id: channelId,
guild_id: "guild-1",
guild: {
id: "guild-1",
name: "Guild One",
},
author: message.author,
message,
} as unknown as import("./listeners.js").DiscordMessageEvent,
client,
});
expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1);
expect(transcribeFirstAudioMock).toHaveBeenCalledWith(
expect.objectContaining({
ctx: expect.objectContaining({
MediaUrls: ["https://cdn.discordapp.com/attachments/voice.ogg"],
MediaTypes: ["audio/ogg"],
}),
}),
);
expect(result).not.toBeNull();
expect(result?.wasMentioned).toBe(true);
});
});
describe("shouldIgnoreBoundThreadWebhookMessage", () => {

View File

@@ -504,11 +504,13 @@ export async function preflightDiscordMessage(
const hasAudioAttachment = message.attachments?.some((att: { content_type?: string }) =>
att.content_type?.startsWith("audio/"),
);
const hasTypedText = Boolean(message.content?.trim());
const needsPreflightTranscription =
!isDirectMessage &&
shouldRequireMention &&
hasAudioAttachment &&
!baseText &&
// `baseText` includes media placeholders; gate on typed text only.
!hasTypedText &&
mentionRegexes.length > 0;
if (needsPreflightTranscription) {
@@ -541,10 +543,11 @@ export async function preflightDiscordMessage(
}
}
const mentionText = hasTypedText ? baseText : "";
const wasMentioned =
!isDirectMessage &&
matchesMentionWithExplicit({
text: baseText,
text: mentionText,
mentionRegexes,
explicit: {
hasAnyMention,