fix: add "audio" to openai provider capabilities

The openai provider implements transcribeAudio via transcribeOpenAiCompatibleAudio (Whisper API), but its capabilities array only declared ["image"]. This caused the media-understanding runner to skip the openai provider when processing inbound audio messages, resulting in raw audio files being passed to agents instead of transcribed text. Fix: Add "audio" to the capabilities array so the runner correctly selects the openai provider for audio transcription. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-03-21 16:41:56 +00:00 · 2026-02-09 22:51:19 +08:00
parent 6a425d189e
commit 76d6514ff5
1 changed files with 1 additions and 1 deletions
--- a/src/media-understanding/providers/openai/index.ts
+++ b/src/media-understanding/providers/openai/index.ts
@@ -4,7 +4,7 @@ import { transcribeOpenAiCompatibleAudio } from "./audio.js";

 export const openaiProvider: MediaUnderstandingProvider = {
  id: "openai",
-  capabilities: ["image"],
+  capabilities: ["image", "audio"],
  describeImage: describeImageWithModel,
  transcribeAudio: transcribeOpenAiCompatibleAudio,
 };