TTS: add provider personas

This commit is contained in:
Barron Roth
2026-04-23 07:26:32 -07:00
committed by Ayaan Zaidi
parent 80219ed1b3
commit 0594fa3c4d
39 changed files with 2021 additions and 136 deletions

View File

@@ -9,16 +9,19 @@ const ttsMocks = vi.hoisted(() => ({
getResolvedSpeechProviderConfig: vi.fn(),
getLastTtsAttempt: vi.fn(),
getTtsMaxLength: vi.fn(),
getTtsPersona: vi.fn(),
getTtsProvider: vi.fn(),
isSummarizationEnabled: vi.fn(),
isTtsEnabled: vi.fn(),
isTtsProviderConfigured: vi.fn(),
listTtsPersonas: vi.fn(),
resolveTtsConfig: vi.fn(),
resolveTtsPrefsPath: vi.fn(),
setLastTtsAttempt: vi.fn(),
setSummarizationEnabled: vi.fn(),
setTtsEnabled: vi.fn(),
setTtsMaxLength: vi.fn(),
setTtsPersona: vi.fn(),
setTtsProvider: vi.fn(),
textToSpeech: vi.fn(),
}));
@@ -66,10 +69,12 @@ describe("handleTtsCommands status fallback reporting", () => {
ttsMocks.resolveTtsPrefsPath.mockReturnValue("/tmp/tts-prefs.json");
ttsMocks.isTtsEnabled.mockReturnValue(true);
ttsMocks.getTtsProvider.mockReturnValue(PRIMARY_TTS_PROVIDER);
ttsMocks.getTtsPersona.mockReturnValue(undefined);
ttsMocks.isTtsProviderConfigured.mockReturnValue(true);
ttsMocks.getTtsMaxLength.mockReturnValue(1500);
ttsMocks.isSummarizationEnabled.mockReturnValue(true);
ttsMocks.getLastTtsAttempt.mockReturnValue(undefined);
ttsMocks.listTtsPersonas.mockReturnValue([]);
});
it("shows fallback provider details for successful attempts", async () => {
@@ -234,6 +239,24 @@ describe("handleTtsCommands status fallback reporting", () => {
);
});
it("lists and sets configured TTS personas", async () => {
ttsMocks.listTtsPersonas.mockReturnValue([
{
id: "alfred",
label: "Alfred",
provider: "google",
},
]);
const listResult = await handleTtsCommands(buildTtsParams("/tts persona"), true);
expect(listResult?.shouldContinue).toBe(false);
expect(listResult?.reply?.text).toContain("alfred (Alfred) provider=google");
const setResult = await handleTtsCommands(buildTtsParams("/tts persona alfred"), true);
expect(setResult?.shouldContinue).toBe(false);
expect(ttsMocks.setTtsPersona).toHaveBeenCalledWith("/tmp/tts-prefs.json", "alfred");
});
it("reads the latest assistant transcript reply once", async () => {
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-tts-latest-"));
const sessionFile = path.join(tempDir, "session.jsonl");

View File

@@ -14,16 +14,19 @@ import {
getResolvedSpeechProviderConfig,
getLastTtsAttempt,
getTtsMaxLength,
getTtsPersona,
getTtsProvider,
isSummarizationEnabled,
isTtsEnabled,
isTtsProviderConfigured,
listTtsPersonas,
resolveTtsConfig,
resolveTtsPrefsPath,
setLastTtsAttempt,
setSummarizationEnabled,
setTtsEnabled,
setTtsMaxLength,
setTtsPersona,
setTtsProvider,
textToSpeech,
} from "../../tts/tts.js";
@@ -68,7 +71,11 @@ function formatAttemptDetails(attempts: TtsAttemptDetail[] | undefined): string
.map((attempt) => {
const reason = attempt.reasonCode === "success" ? "ok" : attempt.reasonCode;
const latency = Number.isFinite(attempt.latencyMs) ? ` ${attempt.latencyMs}ms` : "";
return `${attempt.provider}:${attempt.outcome}(${reason})${latency}`;
const persona =
attempt.persona && attempt.personaBinding && attempt.personaBinding !== "none"
? ` persona=${attempt.persona}:${attempt.personaBinding}`
: "";
return `${attempt.provider}:${attempt.outcome}(${reason})${persona}${latency}`;
})
.join(", ");
}
@@ -83,6 +90,7 @@ function ttsUsage(): ReplyPayload {
`• /tts off — Disable TTS\n` +
`• /tts status — Show current settings\n` +
`• /tts provider [name] — View/change provider\n` +
`• /tts persona [id|off] — View/change persona\n` +
`• /tts limit [number] — View/change text limit\n` +
`• /tts summary [on|off] — View/change auto-summary\n` +
`• /tts audio <text> — Generate audio from text\n` +
@@ -96,6 +104,7 @@ function ttsUsage(): ReplyPayload {
`• Summary OFF: Truncates text, then generates audio\n\n` +
`**Examples:**\n` +
`/tts provider <id>\n` +
`/tts persona <id>\n` +
`/tts limit 2000\n` +
`/tts latest\n` +
`/tts audio Hello, this is a test!`,
@@ -129,6 +138,7 @@ async function buildTtsAudioReply(params: {
textLength: params.text.length,
summarized: false,
provider: result.provider,
persona: result.persona,
fallbackFrom: result.fallbackFrom,
attemptedProviders: result.attemptedProviders,
attempts: result.attempts,
@@ -150,6 +160,7 @@ async function buildTtsAudioReply(params: {
success: false,
textLength: params.text.length,
summarized: false,
persona: result.persona,
attemptedProviders: result.attemptedProviders,
attempts: result.attempts,
error: result.error,
@@ -349,6 +360,50 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
};
}
if (action === "persona") {
const personas = listTtsPersonas(config);
const activePersona = getTtsPersona(config, prefsPath);
if (!args.trim()) {
const lines = [
"🎭 TTS persona",
`Active: ${activePersona?.id ?? "none"}`,
personas.length > 0
? personas
.map((persona) => {
const label = persona.label ? ` (${persona.label})` : "";
const provider = persona.provider ? ` provider=${persona.provider}` : "";
return `${persona.id}${label}${provider}`;
})
.join("\n")
: "No personas configured.",
"Usage: /tts persona <id> | off",
];
return { shouldContinue: false, reply: { text: lines.join("\n") } };
}
const requested = normalizeOptionalLowercaseString(args) ?? "";
if (requested === "off" || requested === "none" || requested === "default") {
setTtsPersona(prefsPath, null);
return { shouldContinue: false, reply: { text: "✅ TTS persona disabled." } };
}
const persona = personas.find((entry) => entry.id === requested);
if (!persona) {
return {
shouldContinue: false,
reply: {
text:
`❌ Unknown TTS persona: ${requested || args}.\n` +
`Use /tts persona to list configured personas.`,
},
};
}
setTtsPersona(prefsPath, persona.id);
return {
shouldContinue: false,
reply: { text: `✅ TTS persona set to ${persona.id}.` },
};
}
if (action === "limit") {
if (!args.trim()) {
const currentLimit = getTtsMaxLength(prefsPath);
@@ -410,6 +465,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
if (action === "status") {
const enabled = isTtsEnabled(config, prefsPath);
const provider = getTtsProvider(config, prefsPath);
const persona = getTtsPersona(config, prefsPath);
const hasKey = isTtsProviderConfigured(config, provider, params.cfg);
const maxLength = getTtsMaxLength(prefsPath);
const summarize = isSummarizationEnabled(prefsPath);
@@ -419,6 +475,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
`State: ${enabled ? "✅ enabled" : "❌ disabled"}`,
`Chat override: ${params.sessionEntry?.ttsAuto ?? "default"}`,
`Provider: ${provider} (${hasKey ? "✅ configured" : "❌ not configured"})`,
`Persona: ${persona?.id ?? "none"}`,
`Text limit: ${maxLength} chars`,
`Auto-summary: ${summarize ? "on" : "off"}`,
];
@@ -429,6 +486,9 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
lines.push(`Text: ${last.textLength} chars${last.summarized ? " (summarized)" : ""}`);
if (last.success) {
lines.push(`Provider: ${last.provider ?? "unknown"}`);
if (last.persona) {
lines.push(`Persona: ${last.persona}`);
}
if (last.fallbackFrom && last.provider && last.fallbackFrom !== last.provider) {
lines.push(`Fallback: ${last.fallbackFrom} -> ${last.provider}`);
}

View File

@@ -73,6 +73,7 @@ const mocks = vi.hoisted(() => ({
attempts: [],
})),
setTtsProvider: vi.fn(),
setTtsPersona: vi.fn(),
resolveExplicitTtsOverrides: vi.fn(
({
provider,
@@ -220,11 +221,14 @@ vi.mock("../video-generation/runtime.js", () => ({
}));
vi.mock("../tts/tts.js", () => ({
getTtsPersona: vi.fn(() => undefined),
getTtsProvider: vi.fn(() => "openai"),
listTtsPersonas: vi.fn(() => []),
listSpeechVoices: vi.fn(async () => []),
resolveTtsConfig: vi.fn(() => ({})),
resolveTtsPrefsPath: vi.fn(() => "/tmp/tts.json"),
setTtsEnabled: vi.fn(),
setTtsPersona: mocks.setTtsPersona as typeof import("../tts/tts.js").setTtsPersona,
setTtsProvider: mocks.setTtsProvider as typeof import("../tts/tts.js").setTtsProvider,
resolveExplicitTtsOverrides:
mocks.resolveExplicitTtsOverrides as typeof import("../tts/tts.js").resolveExplicitTtsOverrides,

View File

@@ -56,11 +56,14 @@ import { theme } from "../terminal/theme.js";
import { canonicalizeSpeechProviderId, listSpeechProviders } from "../tts/provider-registry.js";
import {
getTtsProvider,
getTtsPersona,
listTtsPersonas,
listSpeechVoices,
resolveExplicitTtsOverrides,
resolveTtsConfig,
resolveTtsPrefsPath,
setTtsEnabled,
setTtsPersona,
setTtsProvider,
textToSpeech,
} from "../tts/tts.js";
@@ -256,6 +259,13 @@ const CAPABILITY_METADATA: CapabilityMetadata[] = [
flags: ["--local", "--gateway", "--json"],
resultShape: "provider ids, configured state, models, voices",
},
{
id: "tts.personas",
description: "List TTS personas.",
transports: ["local", "gateway"],
flags: ["--local", "--gateway", "--json"],
resultShape: "persona ids, labels, providers, active persona",
},
{
id: "tts.status",
description: "Show gateway-managed TTS state.",
@@ -284,6 +294,13 @@ const CAPABILITY_METADATA: CapabilityMetadata[] = [
flags: ["--provider", "--local", "--gateway", "--json"],
resultShape: "selected provider",
},
{
id: "tts.set-persona",
description: "Set the active TTS persona.",
transports: ["local", "gateway"],
flags: ["--persona", "--off", "--local", "--gateway", "--json"],
resultShape: "selected persona",
},
{
id: "video.generate",
description: "Generate video files with configured video providers.",
@@ -1181,6 +1198,30 @@ async function runTtsProviders(transport: CapabilityTransport) {
};
}
async function runTtsPersonas(transport: CapabilityTransport) {
if (transport === "gateway") {
return await callGateway({
method: "tts.personas",
timeoutMs: 30_000,
});
}
const cfg = loadConfig();
const config = resolveTtsConfig(cfg);
const prefsPath = resolveTtsPrefsPath(config);
const active = getTtsPersona(config, prefsPath);
return {
active: active?.id ?? null,
personas: listTtsPersonas(config).map((persona) => ({
id: persona.id,
label: persona.label,
description: persona.description,
provider: persona.provider,
fallbackPolicy: persona.fallbackPolicy,
providers: Object.keys(persona.providers ?? {}),
})),
};
}
async function runTtsVoices(providerRaw?: string) {
const cfg = loadConfig();
const config = resolveTtsConfig(cfg);
@@ -1194,9 +1235,10 @@ async function runTtsVoices(providerRaw?: string) {
}
async function runTtsStateMutation(params: {
capability: "tts.enable" | "tts.disable" | "tts.set-provider";
capability: "tts.enable" | "tts.disable" | "tts.set-provider" | "tts.set-persona";
transport: CapabilityTransport;
provider?: string;
persona?: string | null;
}) {
if (params.transport === "gateway") {
const method =
@@ -1204,10 +1246,17 @@ async function runTtsStateMutation(params: {
? "tts.enable"
: params.capability === "tts.disable"
? "tts.disable"
: "tts.setProvider";
: params.capability === "tts.set-provider"
? "tts.setProvider"
: "tts.setPersona";
const payload = await callGateway({
method,
params: params.provider ? { provider: params.provider } : undefined,
params:
params.capability === "tts.set-provider"
? { provider: params.provider }
: params.capability === "tts.set-persona"
? { persona: params.persona ?? "off" }
: undefined,
timeoutMs: 30_000,
});
return payload;
@@ -1224,6 +1273,20 @@ async function runTtsStateMutation(params: {
setTtsEnabled(prefsPath, false);
return { enabled: false };
}
if (params.capability === "tts.set-persona") {
if (!params.persona) {
setTtsPersona(prefsPath, null);
return { persona: null };
}
const persona = listTtsPersonas(config).find(
(entry) => entry.id === normalizeLowercaseStringOrEmpty(params.persona ?? ""),
);
if (!persona) {
throw new Error(`Unknown TTS persona: ${params.persona}`);
}
setTtsPersona(prefsPath, persona.id);
return { persona: persona.id };
}
if (!params.provider) {
throw new Error("--provider is required");
}
@@ -1746,6 +1809,27 @@ export function registerCapabilityCli(program: Command) {
});
});
tts
.command("personas")
.description("List TTS personas")
.option("--local", "Force local execution", false)
.option("--gateway", "Force gateway execution", false)
.option("--json", "Output JSON", false)
.action(async (opts) => {
await runCommandWithRuntime(defaultRuntime, async () => {
const transport = resolveTransport({
local: Boolean(opts.local),
gateway: Boolean(opts.gateway),
supported: ["local", "gateway"],
defaultTransport: "local",
});
const result = await runTtsPersonas(transport);
emitJsonOrText(defaultRuntime, Boolean(opts.json), result, (value) =>
JSON.stringify(value, null, 2),
);
});
});
tts
.command("status")
.description("Show TTS status")
@@ -1823,6 +1907,36 @@ export function registerCapabilityCli(program: Command) {
});
});
tts
.command("set-persona")
.description("Set the active TTS persona")
.option("--persona <id>", "TTS persona id")
.option("--off", "Disable the active TTS persona", false)
.option("--local", "Force local execution", false)
.option("--gateway", "Force gateway execution", false)
.option("--json", "Output JSON", false)
.action(async (opts) => {
await runCommandWithRuntime(defaultRuntime, async () => {
const transport = resolveTransport({
local: Boolean(opts.local),
gateway: Boolean(opts.gateway),
supported: ["local", "gateway"],
defaultTransport: "gateway",
});
if (!opts.off && !opts.persona) {
throw new Error("--persona is required unless --off is set");
}
const result = await runTtsStateMutation({
capability: "tts.set-persona",
persona: opts.off ? null : String(opts.persona),
transport,
});
emitJsonOrText(defaultRuntime, Boolean(opts.json), result, (value) =>
JSON.stringify(value, null, 2),
);
});
});
const video = capability.command("video").description("Video generation and description");
video

View File

@@ -19116,6 +19116,222 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
type: "string",
minLength: 1,
},
persona: {
type: "string",
title: "TTS Persona",
description:
"Default TTS persona id. Local TTS persona preferences can override this per host.",
},
personas: {
type: "object",
propertyNames: {
type: "string",
},
additionalProperties: {
type: "object",
properties: {
label: {
type: "string",
},
description: {
type: "string",
},
provider: {
type: "string",
minLength: 1,
},
fallbackPolicy: {
anyOf: [
{
type: "string",
const: "preserve-persona",
},
{
type: "string",
const: "provider-defaults",
},
{
type: "string",
const: "fail",
},
],
},
prompt: {
type: "object",
properties: {
profile: {
type: "string",
},
scene: {
type: "string",
},
sampleContext: {
type: "string",
},
style: {
type: "string",
},
accent: {
type: "string",
},
pacing: {
type: "string",
},
constraints: {
type: "array",
items: {
type: "string",
},
},
},
additionalProperties: false,
title: "TTS Persona Prompt",
description:
"Provider-neutral persona prompt intent. Providers decide whether and how to map this into request instructions.",
},
rewrite: {
type: "object",
properties: {
enabled: {
type: "boolean",
},
model: {
type: "string",
},
preserveMeaning: {
type: "boolean",
},
compressForSpeech: {
type: "boolean",
},
inCharacter: {
type: "boolean",
},
maxChars: {
type: "integer",
minimum: 1,
maximum: 9007199254740991,
},
},
additionalProperties: false,
},
providers: {
type: "object",
propertyNames: {
type: "string",
},
additionalProperties: {
type: "object",
properties: {
apiKey: {
anyOf: [
{
type: "string",
},
{
oneOf: [
{
type: "object",
properties: {
source: {
type: "string",
const: "env",
},
provider: {
type: "string",
pattern: "^[a-z][a-z0-9_-]{0,63}$",
},
id: {
type: "string",
pattern: "^[A-Z][A-Z0-9_]{0,127}$",
},
},
required: ["source", "provider", "id"],
additionalProperties: false,
},
{
type: "object",
properties: {
source: {
type: "string",
const: "file",
},
provider: {
type: "string",
pattern: "^[a-z][a-z0-9_-]{0,63}$",
},
id: {
type: "string",
},
},
required: ["source", "provider", "id"],
additionalProperties: false,
},
{
type: "object",
properties: {
source: {
type: "string",
const: "exec",
},
provider: {
type: "string",
pattern: "^[a-z][a-z0-9_-]{0,63}$",
},
id: {
type: "string",
},
},
required: ["source", "provider", "id"],
additionalProperties: false,
},
],
},
],
},
},
additionalProperties: {
anyOf: [
{
type: "string",
},
{
type: "number",
},
{
type: "boolean",
},
{
type: "null",
},
{
type: "array",
items: {},
},
{
type: "object",
propertyNames: {
type: "string",
},
additionalProperties: {},
},
],
},
},
title: "TTS Persona Provider Bindings",
description:
"Provider-specific TTS persona bindings keyed by speech provider id. These merge over messages.tts.providers for the active persona.",
},
},
additionalProperties: false,
title: "TTS Persona",
description:
"One TTS persona. Use provider-specific bindings for exact voices/models and prompt templates.",
},
title: "TTS Personas",
description:
"Named TTS personas that define stable spoken identity plus provider-specific speech bindings.",
},
summaryModel: {
type: "string",
},
@@ -27520,6 +27736,31 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: "Text-to-speech policy for reading agent replies aloud on supported voice or audio surfaces. Keep disabled unless voice playback is part of your operator/user workflow.",
tags: ["media"],
},
"messages.tts.persona": {
label: "TTS Persona",
help: "Default TTS persona id. Local TTS persona preferences can override this per host.",
tags: ["media"],
},
"messages.tts.personas": {
label: "TTS Personas",
help: "Named TTS personas that define stable spoken identity plus provider-specific speech bindings.",
tags: ["media"],
},
"messages.tts.personas.*": {
label: "TTS Persona",
help: "One TTS persona. Use provider-specific bindings for exact voices/models and prompt templates.",
tags: ["media"],
},
"messages.tts.personas.*.prompt": {
label: "TTS Persona Prompt",
help: "Provider-neutral persona prompt intent. Providers decide whether and how to map this into request instructions.",
tags: ["media"],
},
"messages.tts.personas.*.providers": {
label: "TTS Persona Provider Bindings",
help: "Provider-specific TTS persona bindings keyed by speech provider id. These merge over messages.tts.providers for the active persona.",
tags: ["media"],
},
"messages.tts.providers": {
label: "TTS Provider Settings",
help: "Provider-specific TTS settings keyed by speech provider id. Use this instead of bundled provider-specific top-level keys so speech plugins stay decoupled from core config schema.",
@@ -28081,6 +28322,10 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
sensitive: true,
tags: ["security", "media", "tools"],
},
"messages.tts.personas.*.providers.*.apiKey": {
sensitive: true,
tags: ["security", "auth", "media"],
},
"mcp.servers.*.headers.*": {
sensitive: true,
tags: ["security"],

View File

@@ -1589,6 +1589,16 @@ export const FIELD_HELP: Record<string, string> = {
"Removes the acknowledgment reaction after final reply delivery when enabled. Keep enabled for cleaner UX in channels where persistent ack reactions create clutter.",
"messages.tts":
"Text-to-speech policy for reading agent replies aloud on supported voice or audio surfaces. Keep disabled unless voice playback is part of your operator/user workflow.",
"messages.tts.persona":
"Default TTS persona id. Local TTS persona preferences can override this per host.",
"messages.tts.personas":
"Named TTS personas that define stable spoken identity plus provider-specific speech bindings.",
"messages.tts.personas.*":
"One TTS persona. Use provider-specific bindings for exact voices/models and prompt templates.",
"messages.tts.personas.*.prompt":
"Provider-neutral persona prompt intent. Providers decide whether and how to map this into request instructions.",
"messages.tts.personas.*.providers":
"Provider-specific TTS persona bindings keyed by speech provider id. These merge over messages.tts.providers for the active persona.",
"messages.tts.providers":
"Provider-specific TTS settings keyed by speech provider id. Use this instead of bundled provider-specific top-level keys so speech plugins stay decoupled from core config schema.",
"messages.tts.providers.*":

View File

@@ -820,6 +820,11 @@ export const FIELD_LABELS: Record<string, string> = {
"messages.inbound.debounceMs": "Inbound Message Debounce (ms)",
"messages.inbound.byChannel": "Inbound Debounce by Channel (ms)",
"messages.tts": "Message Text-to-Speech",
"messages.tts.persona": "TTS Persona",
"messages.tts.personas": "TTS Personas",
"messages.tts.personas.*": "TTS Persona",
"messages.tts.personas.*.prompt": "TTS Persona Prompt",
"messages.tts.personas.*.providers": "TTS Persona Provider Bindings",
"messages.tts.providers": "TTS Provider Settings",
"messages.tts.providers.*": "TTS Provider Config",
"messages.tts.providers.*.apiKey": "TTS Provider API Key", // pragma: allowlist secret

View File

@@ -25,6 +25,43 @@ export type TtsModelOverrideConfig = {
export type TtsProviderConfigMap = Record<string, Record<string, unknown>>;
export type TtsPersonaFallbackPolicy = "preserve-persona" | "provider-defaults" | "fail";
export type TtsPersonaPromptConfig = {
profile?: string;
scene?: string;
sampleContext?: string;
style?: string;
accent?: string;
pacing?: string;
constraints?: string[];
};
export type TtsPersonaRewriteConfig = {
enabled?: boolean;
model?: string;
preserveMeaning?: boolean;
compressForSpeech?: boolean;
inCharacter?: boolean;
maxChars?: number;
};
export type TtsPersonaConfig = {
label?: string;
description?: string;
/** Preferred provider for this persona. Explicit provider prefs still win. */
provider?: TtsProvider;
fallbackPolicy?: TtsPersonaFallbackPolicy;
prompt?: TtsPersonaPromptConfig;
rewrite?: TtsPersonaRewriteConfig;
/** Provider-specific persona bindings keyed by speech provider id. */
providers?: TtsProviderConfigMap;
};
export type ResolvedTtsPersona = TtsPersonaConfig & {
id: string;
};
export type TtsConfig = {
/** Auto-TTS mode (preferred). */
auto?: TtsAutoMode;
@@ -34,6 +71,10 @@ export type TtsConfig = {
mode?: TtsMode;
/** Primary TTS provider (fallbacks are automatic). */
provider?: TtsProvider;
/** Active TTS persona id. */
persona?: string;
/** Named TTS personas. */
personas?: Record<string, TtsPersonaConfig>;
/** Optional model override for TTS auto-summary (provider/model or alias). */
summaryModel?: string;
/** Allow the model to override TTS parameters. */

View File

@@ -497,12 +497,48 @@ const TtsProviderConfigSchema = z
z.record(z.string(), z.unknown()),
]),
);
const TtsPersonaPromptSchema = z
.object({
profile: z.string().optional(),
scene: z.string().optional(),
sampleContext: z.string().optional(),
style: z.string().optional(),
accent: z.string().optional(),
pacing: z.string().optional(),
constraints: z.array(z.string()).optional(),
})
.strict();
const TtsPersonaRewriteSchema = z
.object({
enabled: z.boolean().optional(),
model: z.string().optional(),
preserveMeaning: z.boolean().optional(),
compressForSpeech: z.boolean().optional(),
inCharacter: z.boolean().optional(),
maxChars: z.number().int().min(1).optional(),
})
.strict();
const TtsPersonaSchema = z
.object({
label: z.string().optional(),
description: z.string().optional(),
provider: TtsProviderSchema.optional(),
fallbackPolicy: z
.union([z.literal("preserve-persona"), z.literal("provider-defaults"), z.literal("fail")])
.optional(),
prompt: TtsPersonaPromptSchema.optional(),
rewrite: TtsPersonaRewriteSchema.optional(),
providers: z.record(z.string(), TtsProviderConfigSchema).optional(),
})
.strict();
export const TtsConfigSchema = z
.object({
auto: TtsAutoSchema.optional(),
enabled: z.boolean().optional(),
mode: TtsModeSchema.optional(),
provider: TtsProviderSchema.optional(),
persona: z.string().optional(),
personas: z.record(z.string(), TtsPersonaSchema).optional(),
summaryModel: z.string().optional(),
modelOverrides: z
.object({

View File

@@ -39,4 +39,47 @@ describe("TtsConfigSchema openai speed and instructions", () => {
}),
).not.toThrow();
});
it("accepts provider-specific persona bindings and structured prompt fields", () => {
expect(() =>
TtsConfigSchema.parse({
persona: "alfred",
personas: {
alfred: {
label: "Alfred",
description: "Dry, warm British butler narrator.",
provider: "google",
fallbackPolicy: "preserve-persona",
prompt: {
profile: "A brilliant British butler.",
scene: "A quiet late-night study.",
sampleContext: "The speaker is answering a trusted operator.",
style: "Refined and lightly amused.",
accent: "British English.",
pacing: "Measured.",
constraints: ["Do not read configuration values aloud."],
},
rewrite: {
enabled: false,
preserveMeaning: true,
compressForSpeech: true,
maxChars: 1500,
},
providers: {
google: {
model: "gemini-3.1-flash-tts-preview",
voiceName: "Algieba",
promptTemplate: "audio-profile-v1",
},
openai: {
model: "gpt-4o-mini-tts",
voice: "cedar",
instructions: "Speak with dry warmth.",
},
},
},
},
}),
).not.toThrow();
});
});

View File

@@ -78,6 +78,7 @@ const METHOD_SCOPE_GROUPS: Record<OperatorScope, readonly string[]> = {
"usage.cost",
"tts.status",
"tts.providers",
"tts.personas",
"commands.list",
"models.list",
"models.authStatus",
@@ -131,6 +132,7 @@ const METHOD_SCOPE_GROUPS: Record<OperatorScope, readonly string[]> = {
"tts.disable",
"tts.convert",
"tts.setProvider",
"tts.setPersona",
"voicewake.set",
"node.invoke",
"chat.send",

View File

@@ -20,10 +20,12 @@ const BASE_METHODS = [
"usage.cost",
"tts.status",
"tts.providers",
"tts.personas",
"tts.enable",
"tts.disable",
"tts.convert",
"tts.setProvider",
"tts.setPersona",
"config.get",
"config.set",
"config.apply",

View File

@@ -25,9 +25,11 @@ vi.mock("../../tts/provider-registry.js", () => ({
vi.mock("../../tts/tts.js", () => ({
getResolvedSpeechProviderConfig: vi.fn(),
getTtsPersona: vi.fn(() => undefined),
getTtsProvider: vi.fn(() => "openai"),
isTtsEnabled: vi.fn(() => true),
isTtsProviderConfigured: vi.fn(() => true),
listTtsPersonas: vi.fn(() => []),
resolveExplicitTtsOverrides:
mocks.resolveExplicitTtsOverrides as typeof import("../../tts/tts.js").resolveExplicitTtsOverrides,
resolveTtsAutoMode: vi.fn(() => false),
@@ -35,6 +37,7 @@ vi.mock("../../tts/tts.js", () => ({
resolveTtsPrefsPath: vi.fn(() => "/tmp/tts.json"),
resolveTtsProviderOrder: vi.fn(() => ["openai"]),
setTtsEnabled: vi.fn(),
setTtsPersona: vi.fn(),
setTtsProvider: vi.fn(),
textToSpeech: mocks.textToSpeech as typeof import("../../tts/tts.js").textToSpeech,
}));

View File

@@ -7,15 +7,18 @@ import {
} from "../../tts/provider-registry.js";
import {
getResolvedSpeechProviderConfig,
getTtsPersona,
getTtsProvider,
isTtsEnabled,
isTtsProviderConfigured,
listTtsPersonas,
resolveExplicitTtsOverrides,
resolveTtsAutoMode,
resolveTtsConfig,
resolveTtsPrefsPath,
resolveTtsProviderOrder,
setTtsEnabled,
setTtsPersona,
setTtsProvider,
textToSpeech,
} from "../../tts/tts.js";
@@ -30,6 +33,7 @@ export const ttsHandlers: GatewayRequestHandlers = {
const config = resolveTtsConfig(cfg);
const prefsPath = resolveTtsPrefsPath(config);
const provider = getTtsProvider(config, prefsPath);
const persona = getTtsPersona(config, prefsPath);
const autoMode = resolveTtsAutoMode({ config, prefsPath });
const fallbackProviders = resolveTtsProviderOrder(provider, cfg)
.slice(1)
@@ -47,6 +51,13 @@ export const ttsHandlers: GatewayRequestHandlers = {
enabled: isTtsEnabled(config, prefsPath),
auto: autoMode,
provider,
persona: persona?.id ?? null,
personas: listTtsPersonas(config).map((entry) => ({
id: entry.id,
label: entry.label,
description: entry.description,
provider: entry.provider,
})),
fallbackProvider: fallbackProviders[0] ?? null,
fallbackProviders,
prefsPath,
@@ -157,6 +168,58 @@ export const ttsHandlers: GatewayRequestHandlers = {
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
}
},
"tts.personas": async ({ respond }) => {
try {
const cfg = loadConfig();
const config = resolveTtsConfig(cfg);
const prefsPath = resolveTtsPrefsPath(config);
const active = getTtsPersona(config, prefsPath);
respond(true, {
active: active?.id ?? null,
personas: listTtsPersonas(config).map((persona) => ({
id: persona.id,
label: persona.label,
description: persona.description,
provider: persona.provider,
fallbackPolicy: persona.fallbackPolicy,
providers: Object.keys(persona.providers ?? {}),
})),
});
} catch (err) {
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
}
},
"tts.setPersona": async ({ params, respond }) => {
const cfg = loadConfig();
const rawPersona = normalizeOptionalString(params.persona);
try {
const config = resolveTtsConfig(cfg);
const prefsPath = resolveTtsPrefsPath(config);
if (!rawPersona || ["off", "none", "default"].includes(rawPersona.toLowerCase())) {
setTtsPersona(prefsPath, null);
respond(true, { persona: null });
return;
}
const persona = listTtsPersonas(config).find(
(entry) => entry.id === rawPersona.toLowerCase(),
);
if (!persona) {
respond(
false,
undefined,
errorShape(
ErrorCodes.INVALID_REQUEST,
"Invalid persona. Use a configured TTS persona id.",
),
);
return;
}
setTtsPersona(prefsPath, persona.id);
respond(true, { persona: persona.id });
} catch (err) {
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
}
},
"tts.providers": async ({ respond }) => {
try {
const cfg = loadConfig();

View File

@@ -133,10 +133,15 @@ export type {
TelegramInlineButtonsScope,
TelegramNetworkConfig,
TelegramTopicConfig,
ResolvedTtsPersona,
TtsAutoMode,
TtsConfig,
TtsMode,
TtsModelOverrideConfig,
TtsPersonaConfig,
TtsPersonaFallbackPolicy,
TtsPersonaPromptConfig,
TtsPersonaRewriteConfig,
TtsProvider,
} from "../config/types.js";
export {

View File

@@ -9,11 +9,14 @@ export type {
SpeechModelOverridePolicy,
SpeechProviderConfig,
SpeechProviderConfiguredContext,
SpeechProviderPreparedSynthesis,
SpeechProviderPrepareSynthesisContext,
SpeechProviderResolveConfigContext,
SpeechProviderResolveTalkConfigContext,
SpeechProviderResolveTalkOverridesContext,
SpeechProviderOverrides,
SpeechSynthesisRequest,
SpeechSynthesisTarget,
SpeechTelephonySynthesisRequest,
SpeechVoiceOption,
TtsDirectiveOverrides,
@@ -35,6 +38,7 @@ export {
listSpeechProviders,
normalizeSpeechProviderId,
} from "../tts/provider-registry.js";
export { resolveEffectiveTtsConfig } from "../tts/tts-config.js";
export { normalizeTtsAutoMode, TTS_AUTO_MODES } from "../tts/tts-auto-mode.js";
export {
asBoolean,

View File

@@ -12,11 +12,14 @@ export type {
SpeechModelOverridePolicy,
SpeechProviderConfig,
SpeechProviderConfiguredContext,
SpeechProviderPreparedSynthesis,
SpeechProviderPrepareSynthesisContext,
SpeechProviderResolveConfigContext,
SpeechProviderResolveTalkConfigContext,
SpeechProviderResolveTalkOverridesContext,
SpeechProviderOverrides,
SpeechSynthesisRequest,
SpeechSynthesisTarget,
SpeechTelephonySynthesisRequest,
SpeechVoiceOption,
TtsDirectiveOverrides,

View File

@@ -40,6 +40,10 @@ export const getTtsMaxLength: FacadeModule["getTtsMaxLength"] = createLazyFacade
loadFacadeModule,
"getTtsMaxLength",
);
export const getTtsPersona: FacadeModule["getTtsPersona"] = createLazyFacadeRuntimeValue(
loadFacadeModule,
"getTtsPersona",
);
export const getTtsProvider: FacadeModule["getTtsProvider"] = createLazyFacadeRuntimeValue(
loadFacadeModule,
"getTtsProvider",
@@ -56,6 +60,10 @@ export const listSpeechVoices: FacadeModule["listSpeechVoices"] = createLazyFaca
loadFacadeModule,
"listSpeechVoices",
);
export const listTtsPersonas: FacadeModule["listTtsPersonas"] = createLazyFacadeRuntimeValue(
loadFacadeModule,
"listTtsPersonas",
);
export const maybeApplyTtsToPayload: FacadeModule["maybeApplyTtsToPayload"] =
createLazyFacadeRuntimeValue(loadFacadeModule, "maybeApplyTtsToPayload");
export const resolveExplicitTtsOverrides: FacadeModule["resolveExplicitTtsOverrides"] =
@@ -90,6 +98,10 @@ export const setTtsMaxLength: FacadeModule["setTtsMaxLength"] = createLazyFacade
loadFacadeModule,
"setTtsMaxLength",
);
export const setTtsPersona: FacadeModule["setTtsPersona"] = createLazyFacadeRuntimeValue(
loadFacadeModule,
"setTtsPersona",
);
export const setTtsProvider: FacadeModule["setTtsProvider"] = createLazyFacadeRuntimeValue(
loadFacadeModule,
"setTtsProvider",

View File

@@ -1,5 +1,5 @@
import type { OpenClawConfig } from "../config/types.openclaw.js";
import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js";
import type { ResolvedTtsPersona, TtsAutoMode, TtsProvider } from "../config/types.tts.js";
import type {
SpeechProviderConfig,
SpeechVoiceOption,
@@ -24,6 +24,8 @@ export type TtsProviderAttempt = {
provider: string;
outcome: "success" | "skipped" | "failed";
reasonCode: TtsAttemptReasonCode;
persona?: string;
personaBinding?: "applied" | "missing" | "none";
latencyMs?: number;
error?: string;
};
@@ -34,6 +36,7 @@ export type TtsStatusEntry = {
textLength: number;
summarized: boolean;
provider?: string;
persona?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
@@ -126,6 +129,7 @@ export type TtsResult = {
error?: string;
latencyMs?: number;
provider?: string;
persona?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
@@ -141,6 +145,7 @@ export type TtsSynthesisResult = {
error?: string;
latencyMs?: number;
provider?: string;
persona?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
@@ -156,6 +161,7 @@ export type TtsTelephonyResult = {
error?: string;
latencyMs?: number;
provider?: string;
persona?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
@@ -179,6 +185,7 @@ export type TtsRuntimeFacade = {
cfg?: OpenClawConfig,
) => SpeechProviderConfig;
getTtsMaxLength: (prefsPath: string) => number;
getTtsPersona: (config: ResolvedTtsConfig, prefsPath: string) => ResolvedTtsPersona | undefined;
getTtsProvider: (config: ResolvedTtsConfig, prefsPath: string) => TtsProvider;
isSummarizationEnabled: (prefsPath: string) => boolean;
isTtsEnabled: (config: ResolvedTtsConfig, prefsPath: string, sessionAuto?: string) => boolean;
@@ -188,6 +195,7 @@ export type TtsRuntimeFacade = {
cfg?: OpenClawConfig,
) => boolean;
listSpeechVoices: ListSpeechVoices;
listTtsPersonas: (config: ResolvedTtsConfig) => ResolvedTtsPersona[];
maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise<ReplyPayload>;
resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides;
resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode;
@@ -199,6 +207,7 @@ export type TtsRuntimeFacade = {
setTtsAutoMode: (prefsPath: string, mode: TtsAutoMode) => void;
setTtsEnabled: (prefsPath: string, enabled: boolean) => void;
setTtsMaxLength: (prefsPath: string, maxLength: number) => void;
setTtsPersona: (prefsPath: string, persona: string | null | undefined) => void;
setTtsProvider: (prefsPath: string, provider: TtsProvider) => void;
synthesizeSpeech: (params: TtsRequestParams) => Promise<TtsSynthesisResult>;
textToSpeech: TextToSpeech;

View File

@@ -65,6 +65,8 @@ import type {
SpeechProviderResolveTalkConfigContext,
SpeechProviderResolveTalkOverridesContext,
SpeechListVoicesRequest,
SpeechProviderPrepareSynthesisContext,
SpeechProviderPreparedSynthesis,
SpeechProviderId,
SpeechSynthesisRequest,
SpeechSynthesisResult,
@@ -1724,6 +1726,12 @@ export type SpeechProviderPlugin = {
resolveTalkOverrides?: (
ctx: SpeechProviderResolveTalkOverridesContext,
) => SpeechProviderConfig | undefined;
prepareSynthesis?: (
ctx: SpeechProviderPrepareSynthesisContext,
) =>
| SpeechProviderPreparedSynthesis
| undefined
| Promise<SpeechProviderPreparedSynthesis | undefined>;
isConfigured: (ctx: SpeechProviderConfiguredContext) => boolean;
synthesize: (req: SpeechSynthesisRequest) => Promise<SpeechSynthesisResult>;
synthesizeTelephony?: (

View File

@@ -465,6 +465,9 @@ const formatVoiceModeLine = (
return null;
}
const parts = [`🔊 Voice: ${snapshot.autoMode}`, `provider=${snapshot.provider}`];
if (snapshot.persona) {
parts.push(`persona=${snapshot.persona}`);
}
if (snapshot.displayName) {
parts.push(`name=${snapshot.displayName}`);
}

View File

@@ -1,9 +1,10 @@
import type { TalkProviderConfig } from "../config/types.gateway.js";
import type { OpenClawConfig } from "../config/types.js";
import type { ResolvedTtsPersona } from "../config/types.tts.js";
export type SpeechProviderId = string;
export type SpeechSynthesisTarget = "audio-file" | "voice-note";
export type SpeechSynthesisTarget = "audio-file" | "voice-note" | "telephony";
export type SpeechProviderConfig = Record<string, unknown>;
@@ -69,6 +70,23 @@ export type SpeechTelephonySynthesisResult = {
sampleRate: number;
};
export type SpeechProviderPrepareSynthesisContext = {
text: string;
cfg: OpenClawConfig;
providerConfig: SpeechProviderConfig;
providerOverrides?: SpeechProviderOverrides;
persona?: ResolvedTtsPersona;
personaProviderConfig?: SpeechProviderConfig;
target: SpeechSynthesisTarget;
timeoutMs: number;
};
export type SpeechProviderPreparedSynthesis = {
text?: string;
providerConfig?: SpeechProviderConfig;
providerOverrides?: SpeechProviderOverrides;
};
export type SpeechVoiceOption = {
id: string;
name?: string;

View File

@@ -138,6 +138,44 @@ describe("resolveStatusTtsSnapshot", () => {
});
});
it("reports per-agent persona provider over global persona", async () => {
await withStatusTempHome(async () => {
expect(
resolveStatusTtsSnapshot({
cfg: {
messages: {
tts: {
auto: "always",
persona: "alfred",
personas: {
alfred: { provider: "google" },
jarvis: { provider: "edge" },
},
},
},
agents: {
list: [
{
id: "reader",
tts: {
persona: "jarvis",
},
},
],
},
} as OpenClawConfig,
agentId: "reader",
}),
).toEqual({
autoMode: "always",
provider: "microsoft",
persona: "jarvis",
maxLength: 1500,
summarize: true,
});
});
});
it("reports configured OpenAI TTS model, voice, and sanitized custom endpoint", async () => {
await withStatusTempHome(async () => {
expect(

View File

@@ -20,6 +20,7 @@ type TtsUserPrefs = {
auto?: TtsAutoMode;
enabled?: boolean;
provider?: TtsProvider;
persona?: string | null;
maxLength?: number;
summarize?: boolean;
};
@@ -31,6 +32,7 @@ type TtsStatusSnapshot = {
displayName?: string;
model?: string;
voice?: string;
persona?: string;
baseUrl?: string;
customBaseUrl?: boolean;
maxLength: number;
@@ -51,6 +53,27 @@ function normalizeConfiguredSpeechProviderId(
return normalized === "edge" ? "microsoft" : normalized;
}
function normalizeTtsPersonaId(personaId: string | null | undefined): string | undefined {
return normalizeOptionalLowercaseString(personaId ?? undefined);
}
function resolvePersonaPreferredProvider(
raw: TtsConfig,
personaId: string | undefined,
): TtsProvider | undefined {
if (!personaId || !raw.personas) {
return undefined;
}
for (const [id, persona] of Object.entries(raw.personas)) {
if (normalizeTtsPersonaId(id) !== personaId) {
continue;
}
const provider = normalizeConfiguredSpeechProviderId(persona.provider) ?? persona.provider;
return normalizeOptionalString(provider);
}
return undefined;
}
function resolveTtsPrefsPathValue(prefsPath: string | undefined): string {
const configuredPath = normalizeOptionalString(prefsPath);
if (configuredPath) {
@@ -212,8 +235,13 @@ export function resolveStatusTtsSnapshot(params: {
return null;
}
const persona =
prefs.tts && Object.prototype.hasOwnProperty.call(prefs.tts, "persona")
? normalizeTtsPersonaId(prefs.tts.persona)
: normalizeTtsPersonaId(raw.persona);
const provider =
normalizeConfiguredSpeechProviderId(prefs.tts?.provider) ??
resolvePersonaPreferredProvider(raw, persona) ??
normalizeConfiguredSpeechProviderId(raw.provider) ??
"auto";
@@ -221,6 +249,7 @@ export function resolveStatusTtsSnapshot(params: {
autoMode,
provider,
...resolveStatusProviderDetails(raw, provider),
...(persona ? { persona } : {}),
maxLength: prefs.tts?.maxLength ?? DEFAULT_TTS_MAX_LENGTH,
summarize: prefs.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE,
};

View File

@@ -1,5 +1,11 @@
import type { OpenClawConfig } from "../config/types.openclaw.js";
import type { TtsAutoMode, TtsConfig, TtsMode, TtsProvider } from "../config/types.tts.js";
import type {
ResolvedTtsPersona,
TtsAutoMode,
TtsConfig,
TtsMode,
TtsProvider,
} from "../config/types.tts.js";
import type { SpeechModelOverridePolicy, SpeechProviderConfig } from "./provider-types.js";
export type ResolvedTtsModelOverrides = SpeechModelOverridePolicy;
@@ -9,6 +15,8 @@ export type ResolvedTtsConfig = {
mode: TtsMode;
provider: TtsProvider;
providerSource: "config" | "default";
persona?: string;
personas: Record<string, ResolvedTtsPersona>;
summaryModel?: string;
modelOverrides: ResolvedTtsModelOverrides;
providerConfigs: Record<string, SpeechProviderConfig>;

View File

@@ -4,11 +4,13 @@ export {
getLastTtsAttempt,
getResolvedSpeechProviderConfig,
getTtsMaxLength,
getTtsPersona,
getTtsProvider,
isSummarizationEnabled,
isTtsEnabled,
isTtsProviderConfigured,
listSpeechVoices,
listTtsPersonas,
maybeApplyTtsToPayload,
resolveExplicitTtsOverrides,
resolveTtsAutoMode,
@@ -20,6 +22,7 @@ export {
setTtsAutoMode,
setTtsEnabled,
setTtsMaxLength,
setTtsPersona,
setTtsProvider,
synthesizeSpeech,
textToSpeech,