mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-26 16:06:16 +00:00
refactor(tts): move speech providers into plugins
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import { buildMicrosoftSpeechProvider } from "openclaw/plugin-sdk/speech";
|
||||
import { buildMicrosoftSpeechProvider } from "./speech-provider.js";
|
||||
|
||||
export default definePluginEntry({
|
||||
id: "microsoft",
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
"private": true,
|
||||
"description": "OpenClaw Microsoft speech plugin",
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"node-edge-tts": "^1.2.10"
|
||||
},
|
||||
"openclaw": {
|
||||
"extensions": [
|
||||
"./index.ts"
|
||||
|
||||
43
extensions/microsoft/speech-provider.test.ts
Normal file
43
extensions/microsoft/speech-provider.test.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
import { listMicrosoftVoices } from "./speech-provider.js";
|
||||
|
||||
const fetchMock = vi.fn<typeof fetch>();
|
||||
|
||||
describe("listMicrosoftVoices", () => {
|
||||
afterEach(() => {
|
||||
fetchMock.mockReset();
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
it("maps Microsoft voices to the shared speech voice shape", async () => {
|
||||
fetchMock.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: async () => [
|
||||
{
|
||||
ShortName: "en-US-AvaMultilingualNeural",
|
||||
FriendlyName: "Microsoft Ava",
|
||||
Locale: "en-US",
|
||||
Gender: "Female",
|
||||
VoiceTag: {
|
||||
ContentCategories: ["General"],
|
||||
VoicePersonalities: ["Friendly", "Warm"],
|
||||
},
|
||||
},
|
||||
],
|
||||
} as Response);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
await expect(listMicrosoftVoices()).resolves.toEqual([
|
||||
{
|
||||
id: "en-US-AvaMultilingualNeural",
|
||||
name: "Microsoft Ava",
|
||||
category: "General",
|
||||
description: "Friendly, Warm",
|
||||
locale: "en-US",
|
||||
gender: "Female",
|
||||
personalities: ["Friendly", "Warm"],
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
130
extensions/microsoft/speech-provider.ts
Normal file
130
extensions/microsoft/speech-provider.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import {
|
||||
CHROMIUM_FULL_VERSION,
|
||||
TRUSTED_CLIENT_TOKEN,
|
||||
generateSecMsGecToken,
|
||||
} from "node-edge-tts/dist/drm.js";
|
||||
import type { SpeechProviderPlugin } from "openclaw/plugin-sdk/core";
|
||||
import {
|
||||
edgeTTS,
|
||||
inferEdgeExtension,
|
||||
isVoiceCompatibleAudio,
|
||||
resolvePreferredOpenClawTmpDir,
|
||||
type SpeechVoiceOption,
|
||||
} from "openclaw/plugin-sdk/speech-core";
|
||||
|
||||
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
|
||||
|
||||
type MicrosoftVoiceListEntry = {
|
||||
ShortName?: string;
|
||||
FriendlyName?: string;
|
||||
Locale?: string;
|
||||
Gender?: string;
|
||||
VoiceTag?: {
|
||||
ContentCategories?: string[];
|
||||
VoicePersonalities?: string[];
|
||||
};
|
||||
};
|
||||
|
||||
function buildMicrosoftVoiceHeaders(): Record<string, string> {
|
||||
const major = CHROMIUM_FULL_VERSION.split(".")[0] || "0";
|
||||
return {
|
||||
Authority: "speech.platform.bing.com",
|
||||
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
||||
Accept: "*/*",
|
||||
"User-Agent":
|
||||
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ` +
|
||||
`(KHTML, like Gecko) Chrome/${major}.0.0.0 Safari/537.36 Edg/${major}.0.0.0`,
|
||||
"Sec-MS-GEC": generateSecMsGecToken(),
|
||||
"Sec-MS-GEC-Version": `1-${CHROMIUM_FULL_VERSION}`,
|
||||
};
|
||||
}
|
||||
|
||||
function formatMicrosoftVoiceDescription(entry: MicrosoftVoiceListEntry): string | undefined {
|
||||
const personalities = entry.VoiceTag?.VoicePersonalities?.filter(Boolean) ?? [];
|
||||
return personalities.length > 0 ? personalities.join(", ") : undefined;
|
||||
}
|
||||
|
||||
export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
|
||||
const response = await fetch(
|
||||
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
|
||||
`?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`,
|
||||
{
|
||||
headers: buildMicrosoftVoiceHeaders(),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Microsoft voices API error (${response.status})`);
|
||||
}
|
||||
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
|
||||
return Array.isArray(voices)
|
||||
? voices
|
||||
.map((voice) => ({
|
||||
id: voice.ShortName?.trim() ?? "",
|
||||
name: voice.FriendlyName?.trim() || voice.ShortName?.trim() || undefined,
|
||||
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
|
||||
description: formatMicrosoftVoiceDescription(voice),
|
||||
locale: voice.Locale?.trim() || undefined,
|
||||
gender: voice.Gender?.trim() || undefined,
|
||||
personalities: voice.VoiceTag?.VoicePersonalities?.filter(
|
||||
(value): value is string => value.trim().length > 0,
|
||||
),
|
||||
}))
|
||||
.filter((voice) => voice.id.length > 0)
|
||||
: [];
|
||||
}
|
||||
|
||||
export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
|
||||
return {
|
||||
id: "microsoft",
|
||||
label: "Microsoft",
|
||||
aliases: ["edge"],
|
||||
listVoices: async () => await listMicrosoftVoices(),
|
||||
isConfigured: ({ config }) => config.edge.enabled,
|
||||
synthesize: async (req) => {
|
||||
const tempRoot = resolvePreferredOpenClawTmpDir();
|
||||
mkdirSync(tempRoot, { recursive: true, mode: 0o700 });
|
||||
const tempDir = mkdtempSync(path.join(tempRoot, "tts-microsoft-"));
|
||||
let outputFormat = req.overrides?.microsoft?.outputFormat ?? req.config.edge.outputFormat;
|
||||
const fallbackOutputFormat =
|
||||
outputFormat !== DEFAULT_EDGE_OUTPUT_FORMAT ? DEFAULT_EDGE_OUTPUT_FORMAT : undefined;
|
||||
|
||||
try {
|
||||
const runEdge = async (format: string) => {
|
||||
const fileExtension = inferEdgeExtension(format);
|
||||
const outputPath = path.join(tempDir, `speech${fileExtension}`);
|
||||
await edgeTTS({
|
||||
text: req.text,
|
||||
outputPath,
|
||||
config: {
|
||||
...req.config.edge,
|
||||
voice: req.overrides?.microsoft?.voice ?? req.config.edge.voice,
|
||||
outputFormat: format,
|
||||
},
|
||||
timeoutMs: req.config.timeoutMs,
|
||||
});
|
||||
const audioBuffer = readFileSync(outputPath);
|
||||
return {
|
||||
audioBuffer,
|
||||
outputFormat: format,
|
||||
fileExtension,
|
||||
voiceCompatible: isVoiceCompatibleAudio({ fileName: outputPath }),
|
||||
};
|
||||
};
|
||||
|
||||
try {
|
||||
return await runEdge(outputFormat);
|
||||
} catch (err) {
|
||||
if (!fallbackOutputFormat || fallbackOutputFormat === outputFormat) {
|
||||
throw err;
|
||||
}
|
||||
outputFormat = fallbackOutputFormat;
|
||||
return await runEdge(outputFormat);
|
||||
}
|
||||
} finally {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user