feat(minimax): add native TTS speech provider (T2A v2)

Add MiniMax as a fourth TTS provider alongside OpenAI, ElevenLabs, and Microsoft. Registers a SpeechProviderPlugin in the existing minimax extension with config resolution, directive parsing, and Talk Mode support. Hex-encoded audio response from the T2A v2 API is decoded to MP3. Closes #52720 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-24 07:01:49 +00:00 · 2026-03-27 16:24:56 +01:00
parent 49d962a82f
commit 7d7f5d85b4
6 changed files with 699 additions and 7 deletions
--- a/extensions/minimax/index.ts
+++ b/extensions/minimax/index.ts
@@ -26,6 +26,7 @@ import {
 import type { MiniMaxRegion } from "./oauth.js";
 import { applyMinimaxApiConfig, applyMinimaxApiConfigCn } from "./onboard.js";
 import { buildMinimaxPortalProvider, buildMinimaxProvider } from "./provider-catalog.js";
+import { buildMinimaxSpeechProvider } from "./speech-provider.js";

 const API_PROVIDER_ID = "minimax";
 const PORTAL_PROVIDER_ID = "minimax-portal";
@@ -303,5 +304,6 @@ export default definePluginEntry({
    });
    api.registerImageGenerationProvider(buildMinimaxImageGenerationProvider());
    api.registerImageGenerationProvider(buildMinimaxPortalImageGenerationProvider());
+    api.registerSpeechProvider(buildMinimaxSpeechProvider());
  },
 });
--- a/extensions/minimax/openclaw.plugin.json
+++ b/extensions/minimax/openclaw.plugin.json
@@ -61,6 +61,7 @@
    }
  ],
  "contracts": {
+    "speechProviders": ["minimax"],
    "mediaUnderstandingProviders": ["minimax", "minimax-portal"],
    "imageGenerationProviders": ["minimax", "minimax-portal"]
  },
--- a/extensions/minimax/speech-provider.test.ts
+++ b/extensions/minimax/speech-provider.test.ts
@@ -0,0 +1,318 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { buildMinimaxSpeechProvider } from "./speech-provider.js";
+
+describe("buildMinimaxSpeechProvider", () => {
+  const provider = buildMinimaxSpeechProvider();
+
+  describe("metadata", () => {
+    it("has correct id and label", () => {
+      expect(provider.id).toBe("minimax");
+      expect(provider.label).toBe("MiniMax");
+    });
+
+    it("has autoSelectOrder 40", () => {
+      expect(provider.autoSelectOrder).toBe(40);
+    });
+
+    it("exposes models and voices", () => {
+      expect(provider.models).toContain("speech-2.8-hd");
+      expect(provider.voices).toContain("English_expressive_narrator");
+    });
+  });
+
+  describe("isConfigured", () => {
+    const savedEnv = { ...process.env };
+
+    afterEach(() => {
+      process.env = { ...savedEnv };
+    });
+
+    it("returns true when apiKey is in provider config", () => {
+      expect(
+        provider.isConfigured({ providerConfig: { apiKey: "sk-test" }, timeoutMs: 30000 }),
+      ).toBe(true);
+    });
+
+    it("returns false when no apiKey anywhere", () => {
+      delete process.env.MINIMAX_API_KEY;
+      expect(provider.isConfigured({ providerConfig: {}, timeoutMs: 30000 })).toBe(false);
+    });
+
+    it("returns true when MINIMAX_API_KEY env var is set", () => {
+      process.env.MINIMAX_API_KEY = "sk-env";
+      expect(provider.isConfigured({ providerConfig: {}, timeoutMs: 30000 })).toBe(true);
+    });
+  });
+
+  describe("resolveConfig", () => {
+    const savedEnv = { ...process.env };
+
+    afterEach(() => {
+      process.env = { ...savedEnv };
+    });
+
+    it("returns defaults when rawConfig is empty", () => {
+      delete process.env.MINIMAX_API_HOST;
+      delete process.env.MINIMAX_TTS_MODEL;
+      delete process.env.MINIMAX_TTS_VOICE_ID;
+      const config = provider.resolveConfig!({ rawConfig: {}, cfg: {} as never, timeoutMs: 30000 });
+      expect(config.baseUrl).toBe("https://api.minimaxi.com");
+      expect(config.model).toBe("speech-2.8-hd");
+      expect(config.voiceId).toBe("English_expressive_narrator");
+    });
+
+    it("reads from providers.minimax in rawConfig", () => {
+      const config = provider.resolveConfig!({
+        rawConfig: {
+          providers: {
+            minimax: {
+              baseUrl: "https://custom.api.com",
+              model: "speech-01-240228",
+              voiceId: "Chinese (Mandarin)_Warm_Girl",
+              speed: 1.5,
+              vol: 2.0,
+              pitch: 3,
+            },
+          },
+        },
+        cfg: {} as never,
+        timeoutMs: 30000,
+      });
+      expect(config.baseUrl).toBe("https://custom.api.com");
+      expect(config.model).toBe("speech-01-240228");
+      expect(config.voiceId).toBe("Chinese (Mandarin)_Warm_Girl");
+      expect(config.speed).toBe(1.5);
+      expect(config.vol).toBe(2.0);
+      expect(config.pitch).toBe(3);
+    });
+
+    it("reads from env vars as fallback", () => {
+      process.env.MINIMAX_API_HOST = "https://env.api.com";
+      process.env.MINIMAX_TTS_MODEL = "speech-01-240228";
+      process.env.MINIMAX_TTS_VOICE_ID = "Chinese (Mandarin)_Gentle_Boy";
+      const config = provider.resolveConfig!({ rawConfig: {}, cfg: {} as never, timeoutMs: 30000 });
+      expect(config.baseUrl).toBe("https://env.api.com");
+      expect(config.model).toBe("speech-01-240228");
+      expect(config.voiceId).toBe("Chinese (Mandarin)_Gentle_Boy");
+    });
+  });
+
+  describe("parseDirectiveToken", () => {
+    const policy = {
+      enabled: true,
+      allowText: true,
+      allowProvider: true,
+      allowVoice: true,
+      allowModelId: true,
+      allowVoiceSettings: true,
+      allowNormalization: true,
+      allowSeed: true,
+    };
+
+    it("handles voice key", () => {
+      const result = provider.parseDirectiveToken!({
+        key: "voice",
+        value: "Chinese (Mandarin)_Warm_Girl",
+        policy,
+      });
+      expect(result.handled).toBe(true);
+      expect(result.overrides?.voiceId).toBe("Chinese (Mandarin)_Warm_Girl");
+    });
+
+    it("handles voiceid key", () => {
+      const result = provider.parseDirectiveToken!({ key: "voiceid", value: "test_voice", policy });
+      expect(result.handled).toBe(true);
+      expect(result.overrides?.voiceId).toBe("test_voice");
+    });
+
+    it("handles model key", () => {
+      const result = provider.parseDirectiveToken!({
+        key: "model",
+        value: "speech-01-240228",
+        policy,
+      });
+      expect(result.handled).toBe(true);
+      expect(result.overrides?.model).toBe("speech-01-240228");
+    });
+
+    it("handles speed key with valid value", () => {
+      const result = provider.parseDirectiveToken!({ key: "speed", value: "1.5", policy });
+      expect(result.handled).toBe(true);
+      expect(result.overrides?.speed).toBe(1.5);
+    });
+
+    it("warns on invalid speed", () => {
+      const result = provider.parseDirectiveToken!({ key: "speed", value: "5.0", policy });
+      expect(result.handled).toBe(true);
+      expect(result.warnings).toHaveLength(1);
+      expect(result.overrides).toBeUndefined();
+    });
+
+    it("handles vol key", () => {
+      const result = provider.parseDirectiveToken!({ key: "vol", value: "3", policy });
+      expect(result.handled).toBe(true);
+      expect(result.overrides?.vol).toBe(3);
+    });
+
+    it("warns on vol=0 (exclusive minimum)", () => {
+      const result = provider.parseDirectiveToken!({ key: "vol", value: "0", policy });
+      expect(result.handled).toBe(true);
+      expect(result.warnings).toHaveLength(1);
+    });
+
+    it("handles volume alias", () => {
+      const result = provider.parseDirectiveToken!({ key: "volume", value: "5", policy });
+      expect(result.handled).toBe(true);
+      expect(result.overrides?.vol).toBe(5);
+    });
+
+    it("handles pitch key", () => {
+      const result = provider.parseDirectiveToken!({ key: "pitch", value: "-3", policy });
+      expect(result.handled).toBe(true);
+      expect(result.overrides?.pitch).toBe(-3);
+    });
+
+    it("warns on out-of-range pitch", () => {
+      const result = provider.parseDirectiveToken!({ key: "pitch", value: "20", policy });
+      expect(result.handled).toBe(true);
+      expect(result.warnings).toHaveLength(1);
+    });
+
+    it("returns handled=false for unknown keys", () => {
+      const result = provider.parseDirectiveToken!({
+        key: "unknown_key",
+        value: "whatever",
+        policy,
+      });
+      expect(result.handled).toBe(false);
+    });
+
+    it("suppresses voice when policy disallows it", () => {
+      const result = provider.parseDirectiveToken!({
+        key: "voice",
+        value: "test",
+        policy: { ...policy, allowVoice: false },
+      });
+      expect(result.handled).toBe(true);
+      expect(result.overrides).toBeUndefined();
+    });
+
+    it("suppresses model when policy disallows it", () => {
+      const result = provider.parseDirectiveToken!({
+        key: "model",
+        value: "test",
+        policy: { ...policy, allowModelId: false },
+      });
+      expect(result.handled).toBe(true);
+      expect(result.overrides).toBeUndefined();
+    });
+  });
+
+  describe("synthesize", () => {
+    const savedFetch = globalThis.fetch;
+
+    beforeEach(() => {
+      vi.stubGlobal("fetch", vi.fn());
+    });
+
+    afterEach(() => {
+      globalThis.fetch = savedFetch;
+      vi.restoreAllMocks();
+    });
+
+    it("makes correct API call and decodes hex response", async () => {
+      const hexAudio = Buffer.from("fake-audio-data").toString("hex");
+      const mockFetch = vi.mocked(globalThis.fetch);
+      mockFetch.mockResolvedValueOnce(
+        new Response(JSON.stringify({ data: { audio: hexAudio } }), {
+          status: 200,
+          headers: { "Content-Type": "application/json" },
+        }),
+      );
+
+      const result = await provider.synthesize({
+        text: "Hello world",
+        cfg: {} as never,
+        providerConfig: { apiKey: "sk-test", baseUrl: "https://api.minimaxi.com" },
+        target: "audio-file",
+        timeoutMs: 30000,
+      });
+
+      expect(result.outputFormat).toBe("mp3");
+      expect(result.fileExtension).toBe(".mp3");
+      expect(result.voiceCompatible).toBe(false);
+      expect(result.audioBuffer.toString()).toBe("fake-audio-data");
+
+      expect(mockFetch).toHaveBeenCalledOnce();
+      const [url, init] = mockFetch.mock.calls[0]!;
+      expect(url).toBe("https://api.minimaxi.com/v1/t2a_v2");
+      const body = JSON.parse(init!.body as string);
+      expect(body.model).toBe("speech-2.8-hd");
+      expect(body.text).toBe("Hello world");
+      expect(body.voice_setting.voice_id).toBe("English_expressive_narrator");
+    });
+
+    it("applies overrides", async () => {
+      const hexAudio = Buffer.from("audio").toString("hex");
+      const mockFetch = vi.mocked(globalThis.fetch);
+      mockFetch.mockResolvedValueOnce(
+        new Response(JSON.stringify({ data: { audio: hexAudio } }), { status: 200 }),
+      );
+
+      await provider.synthesize({
+        text: "Test",
+        cfg: {} as never,
+        providerConfig: { apiKey: "sk-test" },
+        providerOverrides: { model: "speech-01-240228", voiceId: "custom_voice", speed: 1.5 },
+        target: "audio-file",
+        timeoutMs: 30000,
+      });
+
+      const body = JSON.parse(vi.mocked(globalThis.fetch).mock.calls[0]![1]!.body as string);
+      expect(body.model).toBe("speech-01-240228");
+      expect(body.voice_setting.voice_id).toBe("custom_voice");
+      expect(body.voice_setting.speed).toBe(1.5);
+    });
+
+    it("throws when API key is missing", async () => {
+      const savedKey = process.env.MINIMAX_API_KEY;
+      delete process.env.MINIMAX_API_KEY;
+      try {
+        await expect(
+          provider.synthesize({
+            text: "Test",
+            cfg: {} as never,
+            providerConfig: {},
+            target: "audio-file",
+            timeoutMs: 30000,
+          }),
+        ).rejects.toThrow("MiniMax API key missing");
+      } finally {
+        if (savedKey) process.env.MINIMAX_API_KEY = savedKey;
+      }
+    });
+
+    it("throws on API error with response body", async () => {
+      vi.mocked(globalThis.fetch).mockResolvedValueOnce(
+        new Response("Unauthorized", { status: 401 }),
+      );
+      await expect(
+        provider.synthesize({
+          text: "Test",
+          cfg: {} as never,
+          providerConfig: { apiKey: "sk-test" },
+          target: "audio-file",
+          timeoutMs: 30000,
+        }),
+      ).rejects.toThrow("MiniMax TTS API error (401): Unauthorized");
+    });
+  });
+
+  describe("listVoices", () => {
+    it("returns known voices", async () => {
+      const voices = await provider.listVoices!({} as never);
+      expect(voices.length).toBeGreaterThan(0);
+      expect(voices[0]!.id).toBe("English_expressive_narrator");
+    });
+  });
+});
--- a/extensions/minimax/speech-provider.ts
+++ b/extensions/minimax/speech-provider.ts
@@ -0,0 +1,245 @@
+import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
+import type {
+  SpeechDirectiveTokenParseContext,
+  SpeechProviderConfig,
+  SpeechProviderOverrides,
+  SpeechProviderPlugin,
+} from "openclaw/plugin-sdk/speech-core";
+import {
+  DEFAULT_MINIMAX_TTS_BASE_URL,
+  MINIMAX_TTS_MODELS,
+  MINIMAX_TTS_VOICES,
+  minimaxTTS,
+  normalizeMinimaxTtsBaseUrl,
+} from "./tts.js";
+
+type MinimaxTtsProviderConfig = {
+  apiKey?: string;
+  baseUrl: string;
+  model: string;
+  voiceId: string;
+  speed?: number;
+  vol?: number;
+  pitch?: number;
+};
+
+type MinimaxTtsProviderOverrides = {
+  model?: string;
+  voiceId?: string;
+  speed?: number;
+  vol?: number;
+  pitch?: number;
+};
+
+function trimToUndefined(value: unknown): string | undefined {
+  return typeof value === "string" && value.trim() ? value.trim() : undefined;
+}
+
+function asNumber(value: unknown): number | undefined {
+  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
+}
+
+function asObject(value: unknown): Record<string, unknown> | undefined {
+  return typeof value === "object" && value !== null && !Array.isArray(value)
+    ? (value as Record<string, unknown>)
+    : undefined;
+}
+
+function normalizeMinimaxProviderConfig(
+  rawConfig: Record<string, unknown>,
+): MinimaxTtsProviderConfig {
+  const providers = asObject(rawConfig.providers);
+  const raw = asObject(providers?.minimax) ?? asObject(rawConfig.minimax);
+  return {
+    apiKey: normalizeResolvedSecretInputString({
+      value: raw?.apiKey,
+      path: "messages.tts.providers.minimax.apiKey",
+    }),
+    baseUrl: normalizeMinimaxTtsBaseUrl(
+      trimToUndefined(raw?.baseUrl) ??
+        trimToUndefined(process.env.MINIMAX_API_HOST) ??
+        DEFAULT_MINIMAX_TTS_BASE_URL,
+    ),
+    model:
+      trimToUndefined(raw?.model) ??
+      trimToUndefined(process.env.MINIMAX_TTS_MODEL) ??
+      "speech-2.8-hd",
+    voiceId:
+      trimToUndefined(raw?.voiceId) ??
+      trimToUndefined(process.env.MINIMAX_TTS_VOICE_ID) ??
+      "English_expressive_narrator",
+    speed: asNumber(raw?.speed),
+    vol: asNumber(raw?.vol),
+    pitch: asNumber(raw?.pitch),
+  };
+}
+
+function readMinimaxProviderConfig(config: SpeechProviderConfig): MinimaxTtsProviderConfig {
+  const normalized = normalizeMinimaxProviderConfig({});
+  return {
+    apiKey: trimToUndefined(config.apiKey) ?? normalized.apiKey,
+    baseUrl: trimToUndefined(config.baseUrl) ?? normalized.baseUrl,
+    model: trimToUndefined(config.model) ?? normalized.model,
+    voiceId: trimToUndefined(config.voiceId) ?? normalized.voiceId,
+    speed: asNumber(config.speed) ?? normalized.speed,
+    vol: asNumber(config.vol) ?? normalized.vol,
+    pitch: asNumber(config.pitch) ?? normalized.pitch,
+  };
+}
+
+function readMinimaxOverrides(
+  overrides: SpeechProviderOverrides | undefined,
+): MinimaxTtsProviderOverrides {
+  if (!overrides) {
+    return {};
+  }
+  return {
+    model: trimToUndefined(overrides.model),
+    voiceId: trimToUndefined(overrides.voiceId),
+    speed: asNumber(overrides.speed),
+    vol: asNumber(overrides.vol),
+    pitch: asNumber(overrides.pitch),
+  };
+}
+
+function parseDirectiveToken(ctx: SpeechDirectiveTokenParseContext): {
+  handled: boolean;
+  overrides?: SpeechProviderOverrides;
+  warnings?: string[];
+} {
+  switch (ctx.key) {
+    case "voice":
+    case "voiceid":
+    case "voice_id":
+    case "minimax_voice":
+    case "minimaxvoice":
+      if (!ctx.policy.allowVoice) {
+        return { handled: true };
+      }
+      return { handled: true, overrides: { voiceId: ctx.value } };
+    case "model":
+    case "minimax_model":
+    case "minimaxmodel":
+      if (!ctx.policy.allowModelId) {
+        return { handled: true };
+      }
+      return { handled: true, overrides: { model: ctx.value } };
+    case "speed": {
+      if (!ctx.policy.allowVoiceSettings) {
+        return { handled: true };
+      }
+      const speed = Number(ctx.value);
+      if (!Number.isFinite(speed) || speed < 0.5 || speed > 2.0) {
+        return { handled: true, warnings: [`invalid MiniMax speed "${ctx.value}" (0.5-2.0)`] };
+      }
+      return { handled: true, overrides: { speed } };
+    }
+    case "vol":
+    case "volume": {
+      if (!ctx.policy.allowVoiceSettings) {
+        return { handled: true };
+      }
+      const vol = Number(ctx.value);
+      if (!Number.isFinite(vol) || vol <= 0 || vol > 10) {
+        return {
+          handled: true,
+          warnings: [`invalid MiniMax volume "${ctx.value}" (0-10, exclusive)`],
+        };
+      }
+      return { handled: true, overrides: { vol } };
+    }
+    case "pitch": {
+      if (!ctx.policy.allowVoiceSettings) {
+        return { handled: true };
+      }
+      const pitch = Number(ctx.value);
+      if (!Number.isFinite(pitch) || pitch < -12 || pitch > 12) {
+        return { handled: true, warnings: [`invalid MiniMax pitch "${ctx.value}" (-12 to 12)`] };
+      }
+      return { handled: true, overrides: { pitch } };
+    }
+    default:
+      return { handled: false };
+  }
+}
+
+export function buildMinimaxSpeechProvider(): SpeechProviderPlugin {
+  return {
+    id: "minimax",
+    label: "MiniMax",
+    autoSelectOrder: 40,
+    models: MINIMAX_TTS_MODELS,
+    voices: MINIMAX_TTS_VOICES,
+    resolveConfig: ({ rawConfig }) => normalizeMinimaxProviderConfig(rawConfig),
+    parseDirectiveToken,
+    resolveTalkConfig: ({ baseTtsConfig, talkProviderConfig }) => {
+      const base = normalizeMinimaxProviderConfig(baseTtsConfig);
+      return {
+        ...base,
+        ...(talkProviderConfig.apiKey === undefined
+          ? {}
+          : {
+              apiKey: normalizeResolvedSecretInputString({
+                value: talkProviderConfig.apiKey,
+                path: "talk.providers.minimax.apiKey",
+              }),
+            }),
+        ...(trimToUndefined(talkProviderConfig.baseUrl) == null
+          ? {}
+          : { baseUrl: normalizeMinimaxTtsBaseUrl(trimToUndefined(talkProviderConfig.baseUrl)) }),
+        ...(trimToUndefined(talkProviderConfig.modelId) == null
+          ? {}
+          : { model: trimToUndefined(talkProviderConfig.modelId) }),
+        ...(trimToUndefined(talkProviderConfig.voiceId) == null
+          ? {}
+          : { voiceId: trimToUndefined(talkProviderConfig.voiceId) }),
+        ...(asNumber(talkProviderConfig.speed) == null
+          ? {}
+          : { speed: asNumber(talkProviderConfig.speed) }),
+        ...(asNumber(talkProviderConfig.vol) == null
+          ? {}
+          : { vol: asNumber(talkProviderConfig.vol) }),
+        ...(asNumber(talkProviderConfig.pitch) == null
+          ? {}
+          : { pitch: asNumber(talkProviderConfig.pitch) }),
+      };
+    },
+    resolveTalkOverrides: ({ params }) => ({
+      ...(trimToUndefined(params.voiceId) == null
+        ? {}
+        : { voiceId: trimToUndefined(params.voiceId) }),
+      ...(trimToUndefined(params.modelId) == null
+        ? {}
+        : { model: trimToUndefined(params.modelId) }),
+      ...(asNumber(params.speed) == null ? {} : { speed: asNumber(params.speed) }),
+    }),
+    listVoices: async () => MINIMAX_TTS_VOICES.map((voice) => ({ id: voice, name: voice })),
+    isConfigured: ({ providerConfig }) =>
+      Boolean(readMinimaxProviderConfig(providerConfig).apiKey || process.env.MINIMAX_API_KEY),
+    synthesize: async (req) => {
+      const config = readMinimaxProviderConfig(req.providerConfig);
+      const overrides = readMinimaxOverrides(req.providerOverrides);
+      const apiKey = config.apiKey || process.env.MINIMAX_API_KEY;
+      if (!apiKey) {
+        throw new Error("MiniMax API key missing");
+      }
+      const audioBuffer = await minimaxTTS({
+        text: req.text,
+        apiKey,
+        baseUrl: config.baseUrl,
+        model: overrides.model ?? config.model,
+        voiceId: overrides.voiceId ?? config.voiceId,
+        speed: overrides.speed ?? config.speed,
+        vol: overrides.vol ?? config.vol,
+        pitch: overrides.pitch ?? config.pitch,
+        timeoutMs: req.timeoutMs,
+      });
+      return {
+        audioBuffer,
+        outputFormat: "mp3",
+        fileExtension: ".mp3",
+        voiceCompatible: false,
+      };
+    },
+  };
+}
--- a/extensions/minimax/tts.ts
+++ b/extensions/minimax/tts.ts
@@ -0,0 +1,90 @@
+export const DEFAULT_MINIMAX_TTS_BASE_URL = "https://api.minimaxi.com";
+
+export const MINIMAX_TTS_MODELS = ["speech-2.8-hd", "speech-01-240228"] as const;
+
+export const MINIMAX_TTS_VOICES = [
+  "English_expressive_narrator",
+  "Chinese (Mandarin)_Warm_Girl",
+  "Chinese (Mandarin)_Lively_Girl",
+  "Chinese (Mandarin)_Gentle_Boy",
+  "Chinese (Mandarin)_Steady_Boy",
+] as const;
+
+export function normalizeMinimaxTtsBaseUrl(baseUrl?: string): string {
+  const trimmed = baseUrl?.trim();
+  if (!trimmed) {
+    return DEFAULT_MINIMAX_TTS_BASE_URL;
+  }
+  return trimmed.replace(/\/+$/, "");
+}
+
+export async function minimaxTTS(params: {
+  text: string;
+  apiKey: string;
+  baseUrl: string;
+  model: string;
+  voiceId: string;
+  speed?: number;
+  vol?: number;
+  pitch?: number;
+  format?: string;
+  sampleRate?: number;
+  timeoutMs: number;
+}): Promise<Buffer> {
+  const {
+    text,
+    apiKey,
+    baseUrl,
+    model,
+    voiceId,
+    speed = 1.0,
+    vol = 1.0,
+    pitch = 0,
+    format = "mp3",
+    sampleRate = 32000,
+    timeoutMs,
+  } = params;
+
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), timeoutMs);
+
+  try {
+    const response = await fetch(`${baseUrl}/v1/t2a_v2`, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model,
+        text,
+        voice_setting: {
+          voice_id: voiceId,
+          speed,
+          vol,
+          pitch,
+        },
+        audio_setting: {
+          format,
+          sample_rate: sampleRate,
+        },
+      }),
+      signal: controller.signal,
+    });
+
+    if (!response.ok) {
+      const errBody = await response.text().catch(() => "");
+      throw new Error(`MiniMax TTS API error (${response.status})${errBody ? `: ${errBody}` : ""}`);
+    }
+
+    const body = (await response.json()) as { data?: { audio?: string } };
+    const hexAudio = body?.data?.audio;
+    if (!hexAudio) {
+      throw new Error("MiniMax TTS API returned no audio data");
+    }
+
+    return Buffer.from(hexAudio, "hex");
+  } finally {
+    clearTimeout(timeout);
+  }
+}