mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-07 07:58:36 +00:00
feat(tts): add per-agent voice overrides
This commit is contained in:
@@ -9,6 +9,9 @@ Docs: https://docs.openclaw.ai
|
||||
### Changes
|
||||
|
||||
- Plugins/tokenjuice: bump the bundled tokenjuice runtime to 0.6.3. Thanks @vincentkoc.
|
||||
- TTS/agents: allow `agents.list[].tts` to override global
|
||||
`messages.tts` for per-agent voices while keeping shared provider
|
||||
credentials and preferences in the existing TTS config surface.
|
||||
- Providers/Azure Speech: add Azure Speech as a bundled TTS provider with
|
||||
Speech-resource auth, voice listing, SSML escaping, native Ogg/Opus
|
||||
voice-note output, and telephony output. (#51776) Thanks @leonchui.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
211e9d4cdb309e7fe0c1ed91d060201240a9287f8c5cb3c893aba3f904a20d30 config-baseline.json
|
||||
ffda2d2911adc03148a368f3b40b17cbdcb7af0066bccdc555e8d596cdea8cda config-baseline.core.json
|
||||
3efb041739877bd5387ffc87e0ddd11be43d80d38e7779407ce8091dcb797e5e config-baseline.json
|
||||
5c6e35c5846f654d717d4b20853649e0b45a746423834f539b2a2223abcd5226 config-baseline.core.json
|
||||
7cd9c908f066c143eab2a201efbc9640f483ab28bba92ddeca1d18cc2b528bc3 config-baseline.channel.json
|
||||
9e131d7734f8b9cc9e7f8af6cc6b6dc81c9971dc551fadbe66fb0d682173f32d config-baseline.plugin.json
|
||||
a5479c182ec987bb21e814b8a4e7b3bda7190ae5c2b35fd5ca403dfa48afa115 config-baseline.plugin.json
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
c911117176b41eebf26470618274a7e093910e9b36855bc045bc8a92f6856745 plugin-sdk-api-baseline.json
|
||||
ff360635f95beb217b9dd207a87eaf331319a7671aea03acfe05911756741b21 plugin-sdk-api-baseline.jsonl
|
||||
6eb33044c2a4726f1aeb2d18052643c38c8bf5244bb970f969b1583365063e8b plugin-sdk-api-baseline.json
|
||||
06e70516047f98d78963c238f1671feb3eea7c7e559c6fa84f403b9562028bb2 plugin-sdk-api-baseline.jsonl
|
||||
|
||||
@@ -915,6 +915,11 @@ scripts/sandbox-browser-setup.sh # optional browser image
|
||||
fastModeDefault: false, // per-agent fast mode override
|
||||
embeddedHarness: { runtime: "auto", fallback: "pi" },
|
||||
params: { cacheRetention: "none" }, // overrides matching defaults.models params by key
|
||||
tts: {
|
||||
providers: {
|
||||
elevenlabs: { voiceId: "EXAVITQu4vr4xnSDxMaL" },
|
||||
},
|
||||
},
|
||||
skills: ["docs-search"], // replaces agents.defaults.skills when set
|
||||
identity: {
|
||||
name: "Samantha",
|
||||
@@ -950,6 +955,7 @@ scripts/sandbox-browser-setup.sh # optional browser image
|
||||
- `default`: when multiple are set, first wins (warning logged). If none set, first list entry is default.
|
||||
- `model`: string form overrides `primary` only; object form `{ primary, fallbacks }` overrides both (`[]` disables global fallbacks). Cron jobs that only override `primary` still inherit default fallbacks unless you set `fallbacks: []`.
|
||||
- `params`: per-agent stream params merged over the selected model entry in `agents.defaults.models`. Use this for agent-specific overrides like `cacheRetention`, `temperature`, or `maxTokens` without duplicating the whole model catalog.
|
||||
- `tts`: optional per-agent text-to-speech overrides. The block deep-merges over `messages.tts`, so keep shared provider credentials and fallback policy in `messages.tts` and set only persona-specific values such as provider, voice, model, style, or auto mode here.
|
||||
- `skills`: optional per-agent skill allowlist. If omitted, the agent inherits `agents.defaults.skills` when set; an explicit list replaces defaults instead of merging, and `[]` means no skills.
|
||||
- `thinkingDefault`: optional per-agent default thinking level (`off | minimal | low | medium | high | xhigh | adaptive | max`). Overrides `agents.defaults.thinkingDefault` for this agent when no per-message or session override is set. The selected provider/model profile controls which values are valid; for Google Gemini, `adaptive` keeps provider-owned dynamic thinking (`thinkingLevel` omitted on Gemini 3/3.1, `thinkingBudget: -1` on Gemini 2.5).
|
||||
- `reasoningDefault`: optional per-agent default reasoning visibility (`on | off | stream`). Applies when no per-message or session reasoning override is set.
|
||||
|
||||
@@ -35,6 +35,7 @@ Scope intent:
|
||||
- `models.providers.*.request.tls.passphrase`
|
||||
- `skills.entries.*.apiKey`
|
||||
- `agents.defaults.memorySearch.remote.apiKey`
|
||||
- `agents.list[].tts.providers.*.apiKey`
|
||||
- `agents.list[].memorySearch.remote.apiKey`
|
||||
- `talk.providers.*.apiKey`
|
||||
- `messages.tts.providers.*.apiKey`
|
||||
|
||||
@@ -29,6 +29,13 @@
|
||||
"secretShape": "secret_input",
|
||||
"optIn": true
|
||||
},
|
||||
{
|
||||
"id": "agents.list[].tts.providers.*.apiKey",
|
||||
"configFile": "openclaw.json",
|
||||
"path": "agents.list[].tts.providers.*.apiKey",
|
||||
"secretShape": "secret_input",
|
||||
"optIn": true
|
||||
},
|
||||
{
|
||||
"id": "auth-profiles.api_key.key",
|
||||
"configFile": "auth-profiles.json",
|
||||
|
||||
@@ -109,6 +109,50 @@ Full schema is in [Gateway configuration](/gateway/configuration).
|
||||
}
|
||||
```
|
||||
|
||||
### Per-agent voice overrides
|
||||
|
||||
Use `agents.list[].tts` when one agent should speak with a different provider,
|
||||
voice, model, style, or auto-TTS mode. The agent block deep-merges over
|
||||
`messages.tts`, so provider credentials can stay in the global provider config.
|
||||
|
||||
```json5
|
||||
{
|
||||
messages: {
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "elevenlabs",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
apiKey: "${ELEVENLABS_API_KEY}",
|
||||
model: "eleven_multilingual_v2",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
agents: {
|
||||
list: [
|
||||
{
|
||||
id: "reader",
|
||||
tts: {
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
voiceId: "EXAVITQu4vr4xnSDxMaL",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Precedence for automatic replies is:
|
||||
|
||||
1. `messages.tts`
|
||||
2. active `agents.list[].tts`
|
||||
3. local `/tts` preferences for this host
|
||||
4. inline `[[tts:...]]` directives when model overrides are enabled
|
||||
|
||||
### OpenAI primary with ElevenLabs fallback
|
||||
|
||||
```json5
|
||||
@@ -702,7 +746,8 @@ Stored fields:
|
||||
- `maxLength` (summary threshold; default 1500 chars)
|
||||
- `summarize` (default `true`)
|
||||
|
||||
These override `messages.tts.*` for that host.
|
||||
These override the effective config from `messages.tts` plus the active
|
||||
`agents.list[].tts` block for that host.
|
||||
|
||||
## Output formats (fixed)
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ vi.mock("../api.js", async () => {
|
||||
};
|
||||
});
|
||||
|
||||
const { _test, maybeApplyTtsToPayload } = await import("./tts.js");
|
||||
const { _test, maybeApplyTtsToPayload, resolveTtsConfig } = await import("./tts.js");
|
||||
|
||||
const nativeVoiceNoteChannels = ["discord", "feishu", "matrix", "telegram", "whatsapp"] as const;
|
||||
|
||||
@@ -158,3 +158,82 @@ describe("speech-core native voice-note routing", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("speech-core per-agent TTS config", () => {
|
||||
it("deep-merges the active agent TTS override over messages.tts", () => {
|
||||
const cfg = {
|
||||
messages: {
|
||||
tts: {
|
||||
enabled: true,
|
||||
provider: "openai",
|
||||
providers: {
|
||||
openai: {
|
||||
apiKey: "${OPENAI_API_KEY}",
|
||||
voice: "coral",
|
||||
speed: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
agents: {
|
||||
list: [
|
||||
{
|
||||
id: "reader",
|
||||
tts: {
|
||||
provider: "openai",
|
||||
providers: {
|
||||
openai: {
|
||||
voice: "nova",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
} satisfies OpenClawConfig;
|
||||
|
||||
const resolved = resolveTtsConfig(cfg, "reader");
|
||||
|
||||
expect(resolved.rawConfig).toMatchObject({
|
||||
enabled: true,
|
||||
provider: "openai",
|
||||
providers: {
|
||||
openai: {
|
||||
apiKey: "${OPENAI_API_KEY}",
|
||||
voice: "nova",
|
||||
speed: 1,
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores prototype-pollution keys in agent TTS overrides", () => {
|
||||
const cfg = {
|
||||
messages: {
|
||||
tts: {
|
||||
provider: "openai",
|
||||
providers: {
|
||||
openai: {
|
||||
voice: "coral",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
agents: {
|
||||
list: [
|
||||
{
|
||||
id: "reader",
|
||||
tts: JSON.parse(
|
||||
'{"providers":{"openai":{"voice":"nova","__proto__":{"polluted":true}}}}',
|
||||
),
|
||||
},
|
||||
],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const resolved = resolveTtsConfig(cfg, "reader");
|
||||
|
||||
expect(resolved.rawConfig?.providers?.openai).toEqual({ voice: "nova" });
|
||||
expect(({} as Record<string, unknown>).polluted).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -62,6 +62,7 @@ const DEFAULT_TIMEOUT_MS = 30_000;
|
||||
const DEFAULT_TTS_MAX_LENGTH = 1500;
|
||||
const DEFAULT_TTS_SUMMARIZE = true;
|
||||
const DEFAULT_MAX_TEXT_LENGTH = 4096;
|
||||
const BLOCKED_MERGE_KEYS = new Set(["__proto__", "prototype", "constructor"]);
|
||||
|
||||
type TtsUserPrefs = {
|
||||
tts?: {
|
||||
@@ -240,6 +241,48 @@ function resolveRawProviderConfig(
|
||||
return asProviderConfig(direct);
|
||||
}
|
||||
|
||||
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function deepMergeDefined(base: unknown, override: unknown): unknown {
|
||||
if (!isPlainObject(base) || !isPlainObject(override)) {
|
||||
return override === undefined ? base : override;
|
||||
}
|
||||
|
||||
const result: Record<string, unknown> = { ...base };
|
||||
for (const [key, value] of Object.entries(override)) {
|
||||
if (BLOCKED_MERGE_KEYS.has(key) || value === undefined) {
|
||||
continue;
|
||||
}
|
||||
const existing = result[key];
|
||||
result[key] = key in result ? deepMergeDefined(existing, value) : value;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function normalizeAgentConfigId(value: string | undefined | null): string {
|
||||
return normalizeLowercaseStringOrEmpty(value);
|
||||
}
|
||||
|
||||
function resolveAgentTtsOverride(
|
||||
cfg: OpenClawConfig,
|
||||
agentId: string | undefined,
|
||||
): TtsConfig | undefined {
|
||||
if (!agentId || !Array.isArray(cfg.agents?.list)) {
|
||||
return undefined;
|
||||
}
|
||||
const normalized = normalizeAgentConfigId(agentId);
|
||||
const agent = cfg.agents.list.find((entry) => normalizeAgentConfigId(entry.id) === normalized);
|
||||
return agent?.tts;
|
||||
}
|
||||
|
||||
function resolveEffectiveTtsRawConfig(cfg: OpenClawConfig, agentId?: string): TtsConfig {
|
||||
const base = cfg.messages?.tts ?? {};
|
||||
const override = resolveAgentTtsOverride(cfg, agentId);
|
||||
return deepMergeDefined(base, override ?? {}) as TtsConfig;
|
||||
}
|
||||
|
||||
function resolveLazyProviderConfig(
|
||||
config: ResolvedTtsConfig,
|
||||
providerId: string,
|
||||
@@ -313,8 +356,8 @@ export function getResolvedSpeechProviderConfig(
|
||||
return resolveLazyProviderConfig(config, canonical, cfg);
|
||||
}
|
||||
|
||||
export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
|
||||
const raw: TtsConfig = cfg.messages?.tts ?? {};
|
||||
export function resolveTtsConfig(cfg: OpenClawConfig, agentId?: string): ResolvedTtsConfig {
|
||||
const raw: TtsConfig = resolveEffectiveTtsRawConfig(cfg, agentId);
|
||||
const providerSource = raw.provider ? "config" : "default";
|
||||
const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
||||
const auto = resolveConfiguredTtsAutoMode(raw);
|
||||
@@ -367,11 +410,15 @@ export function resolveTtsAutoMode(params: {
|
||||
return params.config.auto;
|
||||
}
|
||||
|
||||
function resolveEffectiveTtsAutoState(params: { cfg: OpenClawConfig; sessionAuto?: string }): {
|
||||
function resolveEffectiveTtsAutoState(params: {
|
||||
cfg: OpenClawConfig;
|
||||
sessionAuto?: string;
|
||||
agentId?: string;
|
||||
}): {
|
||||
autoMode: TtsAutoMode;
|
||||
prefsPath: string;
|
||||
} {
|
||||
const raw: TtsConfig = params.cfg.messages?.tts ?? {};
|
||||
const raw: TtsConfig = resolveEffectiveTtsRawConfig(params.cfg, params.agentId);
|
||||
const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath);
|
||||
const sessionAuto = normalizeTtsAutoMode(params.sessionAuto);
|
||||
if (sessionAuto) {
|
||||
@@ -387,12 +434,15 @@ function resolveEffectiveTtsAutoState(params: { cfg: OpenClawConfig; sessionAuto
|
||||
};
|
||||
}
|
||||
|
||||
export function buildTtsSystemPromptHint(cfg: OpenClawConfig): string | undefined {
|
||||
const { autoMode, prefsPath } = resolveEffectiveTtsAutoState({ cfg });
|
||||
export function buildTtsSystemPromptHint(
|
||||
cfg: OpenClawConfig,
|
||||
agentId?: string,
|
||||
): string | undefined {
|
||||
const { autoMode, prefsPath } = resolveEffectiveTtsAutoState({ cfg, agentId });
|
||||
if (autoMode === "off") {
|
||||
return undefined;
|
||||
}
|
||||
const _config = resolveTtsConfig(cfg);
|
||||
const _config = resolveTtsConfig(cfg, agentId);
|
||||
const maxLength = getTtsMaxLength(prefsPath);
|
||||
const summarize = isSummarizationEnabled(prefsPath) ? "on" : "off";
|
||||
const autoHint =
|
||||
@@ -504,11 +554,12 @@ export function resolveExplicitTtsOverrides(params: {
|
||||
provider?: string;
|
||||
modelId?: string;
|
||||
voiceId?: string;
|
||||
agentId?: string;
|
||||
}): TtsDirectiveOverrides {
|
||||
const providerInput = params.provider?.trim();
|
||||
const modelId = params.modelId?.trim();
|
||||
const voiceId = params.voiceId?.trim();
|
||||
const config = resolveTtsConfig(params.cfg);
|
||||
const config = resolveTtsConfig(params.cfg, params.agentId);
|
||||
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
|
||||
const selectedProvider =
|
||||
canonicalizeSpeechProviderId(providerInput, params.cfg) ??
|
||||
@@ -741,6 +792,7 @@ function resolveTtsRequestSetup(params: {
|
||||
prefsPath?: string;
|
||||
providerOverride?: TtsProvider;
|
||||
disableFallback?: boolean;
|
||||
agentId?: string;
|
||||
}):
|
||||
| {
|
||||
config: ResolvedTtsConfig;
|
||||
@@ -749,7 +801,7 @@ function resolveTtsRequestSetup(params: {
|
||||
| {
|
||||
error: string;
|
||||
} {
|
||||
const config = resolveTtsConfig(params.cfg);
|
||||
const config = resolveTtsConfig(params.cfg, params.agentId);
|
||||
const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config);
|
||||
if (params.text.length > config.maxTextLength) {
|
||||
return {
|
||||
@@ -774,6 +826,7 @@ export async function textToSpeech(params: {
|
||||
overrides?: TtsDirectiveOverrides;
|
||||
disableFallback?: boolean;
|
||||
timeoutMs?: number;
|
||||
agentId?: string;
|
||||
}): Promise<TtsResult> {
|
||||
const synthesis = await synthesizeSpeech(params);
|
||||
if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
|
||||
@@ -819,6 +872,7 @@ export async function synthesizeSpeech(params: {
|
||||
overrides?: TtsDirectiveOverrides;
|
||||
disableFallback?: boolean;
|
||||
timeoutMs?: number;
|
||||
agentId?: string;
|
||||
}): Promise<TtsSynthesisResult> {
|
||||
const setup = resolveTtsRequestSetup({
|
||||
text: params.text,
|
||||
@@ -826,6 +880,7 @@ export async function synthesizeSpeech(params: {
|
||||
prefsPath: params.prefsPath,
|
||||
providerOverride: params.overrides?.provider,
|
||||
disableFallback: params.disableFallback,
|
||||
agentId: params.agentId,
|
||||
});
|
||||
if ("error" in setup) {
|
||||
return { success: false, error: setup.error };
|
||||
@@ -1064,6 +1119,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
kind?: "tool" | "block" | "final";
|
||||
inboundAudio?: boolean;
|
||||
ttsAuto?: string;
|
||||
agentId?: string;
|
||||
}): Promise<ReplyPayload> {
|
||||
if (params.payload.isCompactionNotice) {
|
||||
return params.payload;
|
||||
@@ -1071,11 +1127,12 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
const { autoMode, prefsPath } = resolveEffectiveTtsAutoState({
|
||||
cfg: params.cfg,
|
||||
sessionAuto: params.ttsAuto,
|
||||
agentId: params.agentId,
|
||||
});
|
||||
if (autoMode === "off") {
|
||||
return params.payload;
|
||||
}
|
||||
const config = resolveTtsConfig(params.cfg);
|
||||
const config = resolveTtsConfig(params.cfg, params.agentId);
|
||||
const activeProvider = getTtsProvider(config, prefsPath);
|
||||
|
||||
const reply = resolveSendableOutboundReplyParts(params.payload);
|
||||
@@ -1183,6 +1240,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
prefsPath,
|
||||
channel: params.channel,
|
||||
overrides: directives.overrides,
|
||||
agentId: params.agentId,
|
||||
});
|
||||
|
||||
if (result.success && result.audioPath) {
|
||||
|
||||
@@ -25,6 +25,7 @@ export type ResolvedAgentConfig = {
|
||||
skills?: AgentEntry["skills"];
|
||||
memorySearch?: AgentEntry["memorySearch"];
|
||||
humanDelay?: AgentEntry["humanDelay"];
|
||||
tts?: AgentEntry["tts"];
|
||||
contextLimits?: AgentContextLimitsConfig;
|
||||
heartbeat?: AgentEntry["heartbeat"];
|
||||
identity?: AgentEntry["identity"];
|
||||
@@ -123,6 +124,7 @@ export function resolveAgentConfig(
|
||||
skills: Array.isArray(entry.skills) ? entry.skills : undefined,
|
||||
memorySearch: entry.memorySearch,
|
||||
humanDelay: entry.humanDelay,
|
||||
tts: entry.tts,
|
||||
contextLimits:
|
||||
typeof entry.contextLimits === "object" && entry.contextLimits
|
||||
? { ...agentDefaults?.contextLimits, ...entry.contextLimits }
|
||||
|
||||
@@ -65,6 +65,7 @@ describe("resolveAgentConfig", () => {
|
||||
groupChat: undefined,
|
||||
subagents: undefined,
|
||||
sandbox: undefined,
|
||||
tts: undefined,
|
||||
tools: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
@@ -99,7 +99,9 @@ export function buildSystemPrompt(params: {
|
||||
shell: detectRuntimeShell(),
|
||||
},
|
||||
});
|
||||
const ttsHint = params.config ? buildTtsSystemPromptHint(params.config) : undefined;
|
||||
const ttsHint = params.config
|
||||
? buildTtsSystemPromptHint(params.config, params.agentId)
|
||||
: undefined;
|
||||
const ownerDisplay = resolveOwnerDisplaySetting(params.config);
|
||||
return buildAgentSystemPrompt({
|
||||
workspaceDir: params.workspaceDir,
|
||||
|
||||
@@ -722,7 +722,9 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
cwd: effectiveWorkspace,
|
||||
moduleUrl: import.meta.url,
|
||||
});
|
||||
const ttsHint = params.config ? buildTtsSystemPromptHint(params.config) : undefined;
|
||||
const ttsHint = params.config
|
||||
? buildTtsSystemPromptHint(params.config, sessionAgentId)
|
||||
: undefined;
|
||||
const ownerDisplay = resolveOwnerDisplaySetting(params.config);
|
||||
const promptContributionContext: Parameters<
|
||||
AgentRuntimePlan["prompt"]["resolveSystemPromptContribution"]
|
||||
|
||||
@@ -1065,7 +1065,9 @@ export async function runEmbeddedAttempt(
|
||||
cwd: effectiveWorkspace,
|
||||
moduleUrl: import.meta.url,
|
||||
});
|
||||
const ttsHint = params.config ? buildTtsSystemPromptHint(params.config) : undefined;
|
||||
const ttsHint = params.config
|
||||
? buildTtsSystemPromptHint(params.config, sessionAgentId)
|
||||
: undefined;
|
||||
const ownerDisplay = resolveOwnerDisplaySetting(params.config);
|
||||
const heartbeatPrompt = shouldInjectHeartbeatPrompt({
|
||||
config: params.config,
|
||||
|
||||
@@ -146,7 +146,7 @@ export async function resolveCommandsSystemPromptBundle(
|
||||
},
|
||||
}
|
||||
: { enabled: false };
|
||||
const ttsHint = params.cfg ? buildTtsSystemPromptHint(params.cfg) : undefined;
|
||||
const ttsHint = params.cfg ? buildTtsSystemPromptHint(params.cfg, sessionAgentId) : undefined;
|
||||
|
||||
const systemPrompt = buildAgentSystemPrompt({
|
||||
workspaceDir,
|
||||
|
||||
@@ -88,6 +88,7 @@ async function shouldTreatDeliveredTextAsVisible(params: {
|
||||
async function maybeApplyAcpTts(params: {
|
||||
payload: ReplyPayload;
|
||||
cfg: OpenClawConfig;
|
||||
agentId?: string;
|
||||
channel?: string;
|
||||
kind: ReplyDispatchKind;
|
||||
inboundAudio: boolean;
|
||||
@@ -100,6 +101,7 @@ async function maybeApplyAcpTts(params: {
|
||||
const ttsStatus = resolveStatusTtsSnapshot({
|
||||
cfg: params.cfg,
|
||||
sessionAuto: params.ttsAuto,
|
||||
agentId: params.agentId,
|
||||
});
|
||||
if (!ttsStatus) {
|
||||
return params.payload;
|
||||
@@ -107,7 +109,7 @@ async function maybeApplyAcpTts(params: {
|
||||
if (ttsStatus.autoMode === "inbound" && !params.inboundAudio) {
|
||||
return params.payload;
|
||||
}
|
||||
if (params.kind !== "final" && resolveConfiguredTtsMode(params.cfg) === "final") {
|
||||
if (params.kind !== "final" && resolveConfiguredTtsMode(params.cfg, params.agentId) === "final") {
|
||||
return params.payload;
|
||||
}
|
||||
const { maybeApplyTtsToPayload } = await loadDispatchAcpTtsRuntime();
|
||||
@@ -118,6 +120,7 @@ async function maybeApplyAcpTts(params: {
|
||||
kind: params.kind,
|
||||
inboundAudio: params.inboundAudio,
|
||||
ttsAuto: params.ttsAuto,
|
||||
agentId: params.agentId,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -153,6 +156,7 @@ export type AcpDispatchDeliveryCoordinator = {
|
||||
|
||||
export function createAcpDispatchDeliveryCoordinator(params: {
|
||||
cfg: OpenClawConfig;
|
||||
agentId?: string;
|
||||
ctx: FinalizedMsgContext;
|
||||
dispatcher: ReplyDispatcher;
|
||||
inboundAudio: boolean;
|
||||
@@ -294,6 +298,7 @@ export function createAcpDispatchDeliveryCoordinator(params: {
|
||||
const ttsPayload = await maybeApplyAcpTts({
|
||||
payload,
|
||||
cfg: params.cfg,
|
||||
agentId: params.agentId,
|
||||
channel: params.ttsChannel,
|
||||
kind,
|
||||
inboundAudio: params.inboundAudio,
|
||||
|
||||
@@ -186,6 +186,7 @@ async function maybeUnbindStaleBoundConversations(params: {
|
||||
async function finalizeAcpTurnOutput(params: {
|
||||
cfg: OpenClawConfig;
|
||||
sessionKey: string;
|
||||
agentId: string;
|
||||
delivery: AcpDispatchDeliveryCoordinator;
|
||||
inboundAudio: boolean;
|
||||
sessionTtsAuto?: TtsAutoMode;
|
||||
@@ -195,12 +196,13 @@ async function finalizeAcpTurnOutput(params: {
|
||||
await params.delivery.settleVisibleText();
|
||||
let queuedFinal =
|
||||
params.delivery.hasDeliveredVisibleText() && !params.delivery.hasFailedVisibleTextDelivery();
|
||||
const ttsMode = resolveConfiguredTtsMode(params.cfg);
|
||||
const ttsMode = resolveConfiguredTtsMode(params.cfg, params.agentId);
|
||||
const accumulatedBlockText = params.delivery.getAccumulatedBlockText();
|
||||
const hasAccumulatedBlockText = accumulatedBlockText.trim().length > 0;
|
||||
const ttsStatus = resolveStatusTtsSnapshot({
|
||||
cfg: params.cfg,
|
||||
sessionAuto: params.sessionTtsAuto,
|
||||
agentId: params.agentId,
|
||||
});
|
||||
const canAttemptFinalTts =
|
||||
ttsStatus != null && !(ttsStatus.autoMode === "inbound" && !params.inboundAudio);
|
||||
@@ -216,6 +218,7 @@ async function finalizeAcpTurnOutput(params: {
|
||||
kind: "final",
|
||||
inboundAudio: params.inboundAudio,
|
||||
ttsAuto: params.sessionTtsAuto,
|
||||
agentId: params.agentId,
|
||||
});
|
||||
if (ttsSyntheticReply.mediaUrl) {
|
||||
const delivered = await params.delivery.deliver("final", {
|
||||
@@ -308,10 +311,12 @@ export async function tryDispatchAcpReply(params: {
|
||||
return null;
|
||||
}
|
||||
const canonicalSessionKey = acpResolution.sessionKey;
|
||||
const acpAgentId = resolveAgentIdFromSessionKey(canonicalSessionKey);
|
||||
|
||||
let queuedFinal = false;
|
||||
const delivery = createAcpDispatchDeliveryCoordinator({
|
||||
cfg: params.cfg,
|
||||
agentId: acpAgentId,
|
||||
ctx: params.ctx,
|
||||
dispatcher: params.dispatcher,
|
||||
inboundAudio: params.inboundAudio,
|
||||
@@ -476,6 +481,7 @@ export async function tryDispatchAcpReply(params: {
|
||||
(await finalizeAcpTurnOutput({
|
||||
cfg: params.cfg,
|
||||
sessionKey: canonicalSessionKey,
|
||||
agentId: acpAgentId,
|
||||
delivery,
|
||||
inboundAudio: params.inboundAudio,
|
||||
sessionTtsAuto: params.sessionTtsAuto,
|
||||
|
||||
@@ -119,7 +119,9 @@ function loadReplyMediaPathsRuntime() {
|
||||
async function maybeApplyTtsToReplyPayload(
|
||||
params: Parameters<Awaited<ReturnType<typeof loadTtsRuntime>>["maybeApplyTtsToPayload"]>[0],
|
||||
) {
|
||||
if (!shouldAttemptTtsPayload({ cfg: params.cfg, ttsAuto: params.ttsAuto })) {
|
||||
if (
|
||||
!shouldAttemptTtsPayload({ cfg: params.cfg, ttsAuto: params.ttsAuto, agentId: params.agentId })
|
||||
) {
|
||||
return params.payload;
|
||||
}
|
||||
const { maybeApplyTtsToPayload } = await loadTtsRuntime();
|
||||
@@ -729,6 +731,7 @@ export async function dispatchReplyFromConfig(
|
||||
kind: "final",
|
||||
inboundAudio,
|
||||
ttsAuto: sessionTtsAuto,
|
||||
agentId: sessionAgentId,
|
||||
});
|
||||
const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload);
|
||||
const result = await routeReplyToOriginating(normalizedPayload);
|
||||
@@ -996,6 +999,7 @@ export async function dispatchReplyFromConfig(
|
||||
kind: "tool",
|
||||
inboundAudio,
|
||||
ttsAuto: sessionTtsAuto,
|
||||
agentId: sessionAgentId,
|
||||
});
|
||||
const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload);
|
||||
const deliveryPayload = resolveToolDeliveryPayload(normalizedPayload);
|
||||
@@ -1097,6 +1101,7 @@ export async function dispatchReplyFromConfig(
|
||||
kind: "block",
|
||||
inboundAudio,
|
||||
ttsAuto: sessionTtsAuto,
|
||||
agentId: sessionAgentId,
|
||||
});
|
||||
const normalizedPayload = await normalizeReplyMediaPayload(ttsPayload);
|
||||
if (shouldRouteToOriginating) {
|
||||
@@ -1167,7 +1172,7 @@ export async function dispatchReplyFromConfig(
|
||||
routedFinalCount += finalReply.routedFinalCount;
|
||||
}
|
||||
|
||||
const ttsMode = resolveConfiguredTtsMode(cfg);
|
||||
const ttsMode = resolveConfiguredTtsMode(cfg, sessionAgentId);
|
||||
// Generate TTS-only reply after block streaming completes (when there's no final reply).
|
||||
// This handles the case where block streaming succeeds and drops final payloads,
|
||||
// but we still want TTS audio to be generated from the accumulated block content.
|
||||
@@ -1185,6 +1190,7 @@ export async function dispatchReplyFromConfig(
|
||||
kind: "final",
|
||||
inboundAudio,
|
||||
ttsAuto: sessionTtsAuto,
|
||||
agentId: sessionAgentId,
|
||||
});
|
||||
// Only send if TTS was actually applied (mediaUrl exists)
|
||||
if (ttsSyntheticReply.mediaUrl) {
|
||||
|
||||
@@ -27,6 +27,7 @@ const STATIC_AGENT_RUNTIME_BASE_TARGET_IDS = [
|
||||
...STATIC_MODEL_TARGET_IDS,
|
||||
"agents.defaults.memorySearch.remote.apiKey",
|
||||
"agents.list[].memorySearch.remote.apiKey",
|
||||
"agents.list[].tts.providers.*.apiKey",
|
||||
"messages.tts.providers.*.apiKey",
|
||||
"skills.entries.*.apiKey",
|
||||
"tools.web.search.apiKey",
|
||||
|
||||
@@ -6531,6 +6531,177 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
tts: {
|
||||
type: "object",
|
||||
properties: {
|
||||
auto: {
|
||||
type: "string",
|
||||
enum: ["off", "always", "inbound", "tagged"],
|
||||
},
|
||||
enabled: {
|
||||
type: "boolean",
|
||||
},
|
||||
mode: {
|
||||
type: "string",
|
||||
enum: ["final", "all"],
|
||||
},
|
||||
provider: {
|
||||
type: "string",
|
||||
minLength: 1,
|
||||
},
|
||||
summaryModel: {
|
||||
type: "string",
|
||||
},
|
||||
modelOverrides: {
|
||||
type: "object",
|
||||
properties: {
|
||||
enabled: {
|
||||
type: "boolean",
|
||||
},
|
||||
allowText: {
|
||||
type: "boolean",
|
||||
},
|
||||
allowProvider: {
|
||||
type: "boolean",
|
||||
},
|
||||
allowVoice: {
|
||||
type: "boolean",
|
||||
},
|
||||
allowModelId: {
|
||||
type: "boolean",
|
||||
},
|
||||
allowVoiceSettings: {
|
||||
type: "boolean",
|
||||
},
|
||||
allowNormalization: {
|
||||
type: "boolean",
|
||||
},
|
||||
allowSeed: {
|
||||
type: "boolean",
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
providers: {
|
||||
type: "object",
|
||||
propertyNames: {
|
||||
type: "string",
|
||||
},
|
||||
additionalProperties: {
|
||||
type: "object",
|
||||
properties: {
|
||||
apiKey: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
oneOf: [
|
||||
{
|
||||
type: "object",
|
||||
properties: {
|
||||
source: {
|
||||
type: "string",
|
||||
const: "env",
|
||||
},
|
||||
provider: {
|
||||
type: "string",
|
||||
pattern: "^[a-z][a-z0-9_-]{0,63}$",
|
||||
},
|
||||
id: {
|
||||
type: "string",
|
||||
pattern: "^[A-Z][A-Z0-9_]{0,127}$",
|
||||
},
|
||||
},
|
||||
required: ["source", "provider", "id"],
|
||||
additionalProperties: false,
|
||||
},
|
||||
{
|
||||
type: "object",
|
||||
properties: {
|
||||
source: {
|
||||
type: "string",
|
||||
const: "file",
|
||||
},
|
||||
provider: {
|
||||
type: "string",
|
||||
pattern: "^[a-z][a-z0-9_-]{0,63}$",
|
||||
},
|
||||
id: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
required: ["source", "provider", "id"],
|
||||
additionalProperties: false,
|
||||
},
|
||||
{
|
||||
type: "object",
|
||||
properties: {
|
||||
source: {
|
||||
type: "string",
|
||||
const: "exec",
|
||||
},
|
||||
provider: {
|
||||
type: "string",
|
||||
pattern: "^[a-z][a-z0-9_-]{0,63}$",
|
||||
},
|
||||
id: {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
required: ["source", "provider", "id"],
|
||||
additionalProperties: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
additionalProperties: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
type: "number",
|
||||
},
|
||||
{
|
||||
type: "boolean",
|
||||
},
|
||||
{
|
||||
type: "null",
|
||||
},
|
||||
{
|
||||
type: "array",
|
||||
items: {},
|
||||
},
|
||||
{
|
||||
type: "object",
|
||||
propertyNames: {
|
||||
type: "string",
|
||||
},
|
||||
additionalProperties: {},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
prefsPath: {
|
||||
type: "string",
|
||||
},
|
||||
maxTextLength: {
|
||||
type: "integer",
|
||||
minimum: 1,
|
||||
maximum: 9007199254740991,
|
||||
},
|
||||
timeoutMs: {
|
||||
type: "integer",
|
||||
minimum: 1000,
|
||||
maximum: 120000,
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
skillsLimits: {
|
||||
type: "object",
|
||||
properties: {
|
||||
@@ -27586,6 +27757,10 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
sensitive: true,
|
||||
tags: ["security", "auth"],
|
||||
},
|
||||
"agents.list[].tts.providers.*.apiKey": {
|
||||
sensitive: true,
|
||||
tags: ["security", "auth", "media"],
|
||||
},
|
||||
"agents.list[].sandbox.ssh.identityData": {
|
||||
sensitive: true,
|
||||
tags: ["security", "storage"],
|
||||
|
||||
@@ -13,6 +13,7 @@ import type { DmScope, HumanDelayConfig, IdentityConfig } from "./types.base.js"
|
||||
import type { GroupChatConfig } from "./types.messages.js";
|
||||
import type { SkillsLimitsConfig } from "./types.skills.js";
|
||||
import type { AgentToolsConfig, MemorySearchConfig } from "./types.tools.js";
|
||||
import type { TtsConfig } from "./types.tts.js";
|
||||
|
||||
export type AgentRuntimeAcpConfig = {
|
||||
/** ACP harness adapter id (for example codex, claude). */
|
||||
@@ -95,6 +96,8 @@ export type AgentConfig = {
|
||||
memorySearch?: MemorySearchConfig;
|
||||
/** Human-like delay between block replies for this agent. */
|
||||
humanDelay?: HumanDelayConfig;
|
||||
/** Optional per-agent TTS overrides, deep-merged over messages.tts. */
|
||||
tts?: TtsConfig;
|
||||
/** Optional per-agent skills subsystem overrides. */
|
||||
skillsLimits?: Pick<SkillsLimitsConfig, "maxSkillsPromptChars">;
|
||||
/** Optional per-agent overrides for selected context/token-heavy limits. */
|
||||
|
||||
@@ -140,6 +140,25 @@ describe("agent defaults schema", () => {
|
||||
expect(agent.heartbeat?.timeoutSeconds).toBe(45);
|
||||
});
|
||||
|
||||
it("accepts per-agent TTS overrides", () => {
|
||||
const agent = AgentEntrySchema.parse({
|
||||
id: "reader",
|
||||
tts: {
|
||||
provider: "openai",
|
||||
auto: "always",
|
||||
providers: {
|
||||
openai: {
|
||||
voice: "nova",
|
||||
apiKey: "${OPENAI_API_KEY}",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(agent.tts?.provider).toBe("openai");
|
||||
expect(agent.tts?.providers?.openai?.voice).toBe("nova");
|
||||
});
|
||||
|
||||
it("rejects zero heartbeat timeoutSeconds", () => {
|
||||
expect(() => AgentDefaultsSchema.parse({ heartbeat: { timeoutSeconds: 0 } })).toThrow();
|
||||
expect(() => AgentEntrySchema.parse({ id: "ops", heartbeat: { timeoutSeconds: 0 } })).toThrow();
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
SecretInputSchema,
|
||||
ToolsLinksSchema,
|
||||
ToolsMediaSchema,
|
||||
TtsConfigSchema,
|
||||
} from "./zod-schema.core.js";
|
||||
import { sensitive } from "./zod-schema.sensitive.js";
|
||||
|
||||
@@ -828,6 +829,7 @@ export const AgentEntrySchema = z
|
||||
skills: z.array(z.string()).optional(),
|
||||
memorySearch: MemorySearchSchema,
|
||||
humanDelay: HumanDelaySchema.optional(),
|
||||
tts: TtsConfigSchema,
|
||||
skillsLimits: AgentSkillsLimitsSchema,
|
||||
contextLimits: AgentContextLimitsSchema,
|
||||
contextTokens: z.number().int().positive().optional(),
|
||||
|
||||
@@ -62,6 +62,7 @@ export type ResolveExplicitTtsOverridesParams = {
|
||||
provider?: string;
|
||||
modelId?: string;
|
||||
voiceId?: string;
|
||||
agentId?: string;
|
||||
};
|
||||
|
||||
export type TtsRequestParams = {
|
||||
@@ -72,6 +73,7 @@ export type TtsRequestParams = {
|
||||
overrides?: TtsDirectiveOverrides;
|
||||
disableFallback?: boolean;
|
||||
timeoutMs?: number;
|
||||
agentId?: string;
|
||||
};
|
||||
|
||||
export type TtsTelephonyRequestParams = {
|
||||
@@ -95,6 +97,7 @@ export type MaybeApplyTtsToPayloadParams = {
|
||||
kind?: "tool" | "block" | "final";
|
||||
inboundAudio?: boolean;
|
||||
ttsAuto?: string;
|
||||
agentId?: string;
|
||||
};
|
||||
|
||||
export type TtsTestFacade = {
|
||||
@@ -168,7 +171,7 @@ export type ListSpeechVoices = (params: ListSpeechVoicesParams) => Promise<Speec
|
||||
|
||||
export type TtsRuntimeFacade = {
|
||||
_test: TtsTestFacade;
|
||||
buildTtsSystemPromptHint: (cfg: OpenClawConfig) => string | undefined;
|
||||
buildTtsSystemPromptHint: (cfg: OpenClawConfig, agentId?: string) => string | undefined;
|
||||
getLastTtsAttempt: () => TtsStatusEntry | undefined;
|
||||
getResolvedSpeechProviderConfig: (
|
||||
config: ResolvedTtsConfig,
|
||||
@@ -188,7 +191,7 @@ export type TtsRuntimeFacade = {
|
||||
maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise<ReplyPayload>;
|
||||
resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides;
|
||||
resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode;
|
||||
resolveTtsConfig: (cfg: OpenClawConfig) => ResolvedTtsConfig;
|
||||
resolveTtsConfig: (cfg: OpenClawConfig, agentId?: string) => ResolvedTtsConfig;
|
||||
resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string;
|
||||
resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[];
|
||||
setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void;
|
||||
|
||||
@@ -506,6 +506,29 @@ function collectMessagesTtsAssignments(params: {
|
||||
});
|
||||
}
|
||||
|
||||
function collectAgentTtsAssignments(params: {
|
||||
config: OpenClawConfig;
|
||||
defaults: SecretDefaults | undefined;
|
||||
context: ResolverContext;
|
||||
}): void {
|
||||
const agents = params.config.agents as Record<string, unknown> | undefined;
|
||||
const list = agents?.list;
|
||||
if (!Array.isArray(list)) {
|
||||
return;
|
||||
}
|
||||
for (const [index, entry] of list.entries()) {
|
||||
if (!isRecord(entry) || !isRecord(entry.tts)) {
|
||||
continue;
|
||||
}
|
||||
collectTtsApiKeyAssignments({
|
||||
tts: entry.tts,
|
||||
pathPrefix: `agents.list.${index}.tts`,
|
||||
defaults: params.defaults,
|
||||
context: params.context,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function collectCronAssignments(params: {
|
||||
config: OpenClawConfig;
|
||||
defaults: SecretDefaults | undefined;
|
||||
@@ -640,6 +663,7 @@ export function collectCoreConfigAssignments(params: {
|
||||
collectGatewayAssignments(params);
|
||||
collectSandboxSshAssignments(params);
|
||||
collectMessagesTtsAssignments(params);
|
||||
collectAgentTtsAssignments(params);
|
||||
collectCronAssignments(params);
|
||||
collectMediaRequestAssignments(params);
|
||||
}
|
||||
|
||||
@@ -204,6 +204,18 @@ const CORE_SECRET_TARGET_REGISTRY: SecretTargetRegistryEntry[] = [
|
||||
includeInAudit: true,
|
||||
providerIdPathSegmentIndex: 3,
|
||||
},
|
||||
{
|
||||
id: "agents.list[].tts.providers.*.apiKey",
|
||||
targetType: "agents.list[].tts.providers.*.apiKey",
|
||||
configFile: "openclaw.json",
|
||||
pathPattern: "agents.list[].tts.providers.*.apiKey",
|
||||
secretShape: SECRET_INPUT_SHAPE,
|
||||
expectedResolvedValue: "string",
|
||||
includeInPlan: true,
|
||||
includeInConfigure: false,
|
||||
includeInAudit: true,
|
||||
providerIdPathSegmentIndex: 4,
|
||||
},
|
||||
{
|
||||
id: "models.providers.*.apiKey",
|
||||
targetType: "models.providers.apiKey",
|
||||
|
||||
@@ -451,6 +451,7 @@ const formatMediaUnderstandingLine = (decisions?: ReadonlyArray<MediaUnderstandi
|
||||
const formatVoiceModeLine = (
|
||||
config?: OpenClawConfig,
|
||||
sessionEntry?: SessionEntry,
|
||||
agentId?: string,
|
||||
): string | null => {
|
||||
if (!config) {
|
||||
return null;
|
||||
@@ -458,6 +459,7 @@ const formatVoiceModeLine = (
|
||||
const snapshot = resolveStatusTtsSnapshot({
|
||||
cfg: config,
|
||||
sessionAuto: sessionEntry?.ttsAuto,
|
||||
agentId,
|
||||
});
|
||||
if (!snapshot) {
|
||||
return null;
|
||||
@@ -890,7 +892,7 @@ export function buildStatusMessage(args: StatusArgs): string {
|
||||
const usageCostLine =
|
||||
usagePair && costLine ? `${usagePair} · ${costLine}` : (usagePair ?? costLine);
|
||||
const mediaLine = formatMediaUnderstandingLine(args.mediaDecisions);
|
||||
const voiceLine = formatVoiceModeLine(args.config, args.sessionEntry);
|
||||
const voiceLine = formatVoiceModeLine(args.config, args.sessionEntry, args.agentId);
|
||||
|
||||
return [
|
||||
versionLine,
|
||||
|
||||
@@ -104,6 +104,40 @@ describe("resolveStatusTtsSnapshot", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("reports per-agent TTS overrides", async () => {
|
||||
await withStatusTempHome(async () => {
|
||||
expect(
|
||||
resolveStatusTtsSnapshot({
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {
|
||||
auto: "off",
|
||||
provider: "openai",
|
||||
},
|
||||
},
|
||||
agents: {
|
||||
list: [
|
||||
{
|
||||
id: "reader",
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "elevenlabs",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
agentId: "reader",
|
||||
}),
|
||||
).toEqual({
|
||||
autoMode: "always",
|
||||
provider: "elevenlabs",
|
||||
maxLength: 1500,
|
||||
summarize: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("derives the default prefs path from OPENCLAW_CONFIG_PATH when set", async () => {
|
||||
await withStatusTempHome(async (home) => {
|
||||
const stateDir = path.join(home, ".openclaw-dev");
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
} from "../shared/string-coerce.js";
|
||||
import { resolveConfigDir, resolveUserPath } from "../utils.js";
|
||||
import { normalizeTtsAutoMode } from "./tts-auto-mode.js";
|
||||
import { resolveEffectiveTtsConfig } from "./tts-config.js";
|
||||
|
||||
const DEFAULT_TTS_MAX_LENGTH = 1500;
|
||||
const DEFAULT_TTS_SUMMARIZE = true;
|
||||
@@ -80,8 +81,9 @@ function resolveTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefin
|
||||
export function resolveStatusTtsSnapshot(params: {
|
||||
cfg: OpenClawConfig;
|
||||
sessionAuto?: string;
|
||||
agentId?: string;
|
||||
}): TtsStatusSnapshot | null {
|
||||
const raw: TtsConfig = params.cfg.messages?.tts ?? {};
|
||||
const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, params.agentId);
|
||||
const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath);
|
||||
const prefs = readPrefs(prefsPath);
|
||||
const autoMode =
|
||||
|
||||
@@ -3,7 +3,7 @@ import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, beforeAll, afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { shouldAttemptTtsPayload } from "./tts-config.js";
|
||||
import { resolveConfiguredTtsMode, shouldAttemptTtsPayload } from "./tts-config.js";
|
||||
|
||||
describe("shouldAttemptTtsPayload", () => {
|
||||
let originalPrefsPath: string | undefined;
|
||||
@@ -61,4 +61,31 @@ describe("shouldAttemptTtsPayload", () => {
|
||||
shouldAttemptTtsPayload({ cfg: { messages: { tts: { enabled: true } } } as OpenClawConfig }),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("uses per-agent TTS auto and mode overrides", () => {
|
||||
const cfg = {
|
||||
messages: {
|
||||
tts: {
|
||||
auto: "off",
|
||||
mode: "final",
|
||||
},
|
||||
},
|
||||
agents: {
|
||||
list: [
|
||||
{
|
||||
id: "voice",
|
||||
tts: {
|
||||
auto: "always",
|
||||
mode: "all",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
expect(shouldAttemptTtsPayload({ cfg, agentId: "voice" })).toBe(true);
|
||||
expect(resolveConfiguredTtsMode(cfg, "voice")).toBe("all");
|
||||
expect(shouldAttemptTtsPayload({ cfg, agentId: "main" })).toBe(false);
|
||||
expect(resolveConfiguredTtsMode(cfg, "main")).toBe("final");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,13 +1,54 @@
|
||||
import { existsSync, readFileSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import type { TtsAutoMode, TtsMode } from "../config/types.tts.js";
|
||||
import type { TtsAutoMode, TtsConfig, TtsMode } from "../config/types.tts.js";
|
||||
import { normalizeAgentId } from "../routing/session-key.js";
|
||||
import { resolveConfigDir, resolveUserPath } from "../utils.js";
|
||||
import { normalizeTtsAutoMode } from "./tts-auto-mode.js";
|
||||
export { normalizeTtsAutoMode } from "./tts-auto-mode.js";
|
||||
|
||||
export function resolveConfiguredTtsMode(cfg: OpenClawConfig): TtsMode {
|
||||
return cfg.messages?.tts?.mode ?? "final";
|
||||
const BLOCKED_MERGE_KEYS = new Set(["__proto__", "prototype", "constructor"]);
|
||||
|
||||
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
||||
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function deepMergeDefined(base: unknown, override: unknown): unknown {
|
||||
if (!isPlainObject(base) || !isPlainObject(override)) {
|
||||
return override === undefined ? base : override;
|
||||
}
|
||||
|
||||
const result: Record<string, unknown> = { ...base };
|
||||
for (const [key, value] of Object.entries(override)) {
|
||||
if (BLOCKED_MERGE_KEYS.has(key) || value === undefined) {
|
||||
continue;
|
||||
}
|
||||
const existing = result[key];
|
||||
result[key] = key in result ? deepMergeDefined(existing, value) : value;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function resolveAgentTtsOverride(
|
||||
cfg: OpenClawConfig,
|
||||
agentId: string | undefined,
|
||||
): TtsConfig | undefined {
|
||||
if (!agentId || !Array.isArray(cfg.agents?.list)) {
|
||||
return undefined;
|
||||
}
|
||||
const normalized = normalizeAgentId(agentId);
|
||||
const agent = cfg.agents.list.find((entry) => normalizeAgentId(entry.id) === normalized);
|
||||
return agent?.tts;
|
||||
}
|
||||
|
||||
export function resolveEffectiveTtsConfig(cfg: OpenClawConfig, agentId?: string): TtsConfig {
|
||||
const base = cfg.messages?.tts ?? {};
|
||||
const override = resolveAgentTtsOverride(cfg, agentId);
|
||||
return deepMergeDefined(base, override ?? {}) as TtsConfig;
|
||||
}
|
||||
|
||||
export function resolveConfiguredTtsMode(cfg: OpenClawConfig, agentId?: string): TtsMode {
|
||||
return resolveEffectiveTtsConfig(cfg, agentId).mode ?? "final";
|
||||
}
|
||||
|
||||
function resolveTtsPrefsPathValue(prefsPath: string | undefined): string {
|
||||
@@ -45,13 +86,14 @@ function readTtsPrefsAutoMode(prefsPath: string): TtsAutoMode | undefined {
|
||||
export function shouldAttemptTtsPayload(params: {
|
||||
cfg: OpenClawConfig;
|
||||
ttsAuto?: string;
|
||||
agentId?: string;
|
||||
}): boolean {
|
||||
const sessionAuto = normalizeTtsAutoMode(params.ttsAuto);
|
||||
if (sessionAuto) {
|
||||
return sessionAuto !== "off";
|
||||
}
|
||||
|
||||
const raw = params.cfg.messages?.tts;
|
||||
const raw = resolveEffectiveTtsConfig(params.cfg, params.agentId);
|
||||
const prefsAuto = readTtsPrefsAutoMode(resolveTtsPrefsPathValue(raw?.prefsPath));
|
||||
if (prefsAuto) {
|
||||
return prefsAuto !== "off";
|
||||
|
||||
Reference in New Issue
Block a user