From 76d6514ff56f62654589fc87ab0356a8726bd634 Mon Sep 17 00:00:00 2001
From: openjay <snowzhangxuegang@gmail.com>
Date: Mon, 9 Feb 2026 22:51:19 +0800
Subject: [PATCH] fix: add "audio" to openai provider capabilities

The openai provider implements transcribeAudio via
transcribeOpenAiCompatibleAudio (Whisper API), but its capabilities
array only declared ["image"]. This caused the media-understanding
runner to skip the openai provider when processing inbound audio
messages, resulting in raw audio files being passed to agents
instead of transcribed text.

Fix: Add "audio" to the capabilities array so the runner correctly
selects the openai provider for audio transcription.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 src/media-understanding/providers/openai/index.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/media-understanding/providers/openai/index.ts b/src/media-understanding/providers/openai/index.ts
index d6e735c18ef..24d01964562 100644
--- a/src/media-understanding/providers/openai/index.ts
+++ b/src/media-understanding/providers/openai/index.ts
@@ -4,7 +4,7 @@ import { transcribeOpenAiCompatibleAudio } from "./audio.js";
 
 export const openaiProvider: MediaUnderstandingProvider = {
   id: "openai",
-  capabilities: ["image"],
+  capabilities: ["image", "audio"],
   describeImage: describeImageWithModel,
   transcribeAudio: transcribeOpenAiCompatibleAudio,
 };