From 76d6514ff56f62654589fc87ab0356a8726bd634 Mon Sep 17 00:00:00 2001 From: openjay Date: Mon, 9 Feb 2026 22:51:19 +0800 Subject: [PATCH] fix: add "audio" to openai provider capabilities The openai provider implements transcribeAudio via transcribeOpenAiCompatibleAudio (Whisper API), but its capabilities array only declared ["image"]. This caused the media-understanding runner to skip the openai provider when processing inbound audio messages, resulting in raw audio files being passed to agents instead of transcribed text. Fix: Add "audio" to the capabilities array so the runner correctly selects the openai provider for audio transcription. Co-authored-by: Cursor --- src/media-understanding/providers/openai/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/media-understanding/providers/openai/index.ts b/src/media-understanding/providers/openai/index.ts index d6e735c18ef..24d01964562 100644 --- a/src/media-understanding/providers/openai/index.ts +++ b/src/media-understanding/providers/openai/index.ts @@ -4,7 +4,7 @@ import { transcribeOpenAiCompatibleAudio } from "./audio.js"; export const openaiProvider: MediaUnderstandingProvider = { id: "openai", - capabilities: ["image"], + capabilities: ["image", "audio"], describeImage: describeImageWithModel, transcribeAudio: transcribeOpenAiCompatibleAudio, };