mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-07 22:44:16 +00:00
fix(tts): make model provider overrides opt-in
This commit is contained in:
@@ -1342,6 +1342,7 @@ Batches rapid text-only messages from the same sender into a single agent turn.
|
||||
|
||||
- `auto` controls auto-TTS. `/tts off|always|inbound|tagged` overrides per session.
|
||||
- `summaryModel` overrides `agents.defaults.model.primary` for auto-summary.
|
||||
- `modelOverrides` is enabled by default; `modelOverrides.allowProvider` defaults to `false` (opt-in).
|
||||
- API keys fall back to `ELEVENLABS_API_KEY`/`XI_API_KEY` and `OPENAI_API_KEY`.
|
||||
|
||||
---
|
||||
|
||||
11
docs/tts.md
11
docs/tts.md
@@ -210,6 +210,7 @@ Then run:
|
||||
- `summaryModel`: optional cheap model for auto-summary; defaults to `agents.defaults.model.primary`.
|
||||
- Accepts `provider/model` or a configured model alias.
|
||||
- `modelOverrides`: allow the model to emit TTS directives (on by default).
|
||||
- `allowProvider` defaults to `false` (provider switching is opt-in).
|
||||
- `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
|
||||
- `timeoutMs`: request timeout (ms).
|
||||
- `prefsPath`: override the local prefs JSON path (provider/limit/summary).
|
||||
@@ -242,18 +243,20 @@ for a single reply, plus an optional `[[tts:text]]...[[/tts:text]]` block to
|
||||
provide expressive tags (laughter, singing cues, etc) that should only appear in
|
||||
the audio.
|
||||
|
||||
`provider=...` directives are ignored unless `modelOverrides.allowProvider: true`.
|
||||
|
||||
Example reply payload:
|
||||
|
||||
```
|
||||
Here you go.
|
||||
|
||||
[[tts:provider=elevenlabs voiceId=pMsXgVXv3BLzUgSXRplE model=eleven_v3 speed=1.1]]
|
||||
[[tts:voiceId=pMsXgVXv3BLzUgSXRplE model=eleven_v3 speed=1.1]]
|
||||
[[tts:text]](laughs) Read the song once more.[[/tts:text]]
|
||||
```
|
||||
|
||||
Available directive keys (when enabled):
|
||||
|
||||
- `provider` (`openai` | `elevenlabs` | `edge`)
|
||||
- `provider` (`openai` | `elevenlabs` | `edge`, requires `allowProvider: true`)
|
||||
- `voice` (OpenAI voice) or `voiceId` (ElevenLabs)
|
||||
- `model` (OpenAI TTS model or ElevenLabs model id)
|
||||
- `stability`, `similarityBoost`, `style`, `speed`, `useSpeakerBoost`
|
||||
@@ -275,7 +278,7 @@ Disable all model overrides:
|
||||
}
|
||||
```
|
||||
|
||||
Optional allowlist (disable specific overrides while keeping tags enabled):
|
||||
Optional allowlist (enable provider switching while keeping other knobs configurable):
|
||||
|
||||
```json5
|
||||
{
|
||||
@@ -283,7 +286,7 @@ Optional allowlist (disable specific overrides while keeping tags enabled):
|
||||
tts: {
|
||||
modelOverrides: {
|
||||
enabled: true,
|
||||
allowProvider: false,
|
||||
allowProvider: true,
|
||||
allowSeed: false,
|
||||
},
|
||||
},
|
||||
|
||||
@@ -9,7 +9,7 @@ export type TtsModelOverrideConfig = {
|
||||
enabled?: boolean;
|
||||
/** Allow model-provided TTS text blocks. */
|
||||
allowText?: boolean;
|
||||
/** Allow model-provided provider override. */
|
||||
/** Allow model-provided provider override (default: false). */
|
||||
allowProvider?: boolean;
|
||||
/** Allow model-provided voice/voiceId override. */
|
||||
allowVoice?: boolean;
|
||||
|
||||
@@ -215,7 +215,7 @@ describe("tts", () => {
|
||||
|
||||
describe("parseTtsDirectives", () => {
|
||||
it("extracts overrides and strips directives when enabled", () => {
|
||||
const policy = resolveModelOverridePolicy({ enabled: true });
|
||||
const policy = resolveModelOverridePolicy({ enabled: true, allowProvider: true });
|
||||
const input =
|
||||
"Hello [[tts:provider=elevenlabs voiceId=pMsXgVXv3BLzUgSXRplE stability=0.4 speed=1.1]] world\n\n" +
|
||||
"[[tts:text]](laughs) Read the song once more.[[/tts:text]]";
|
||||
@@ -230,13 +230,22 @@ describe("tts", () => {
|
||||
});
|
||||
|
||||
it("accepts edge as provider override", () => {
|
||||
const policy = resolveModelOverridePolicy({ enabled: true });
|
||||
const policy = resolveModelOverridePolicy({ enabled: true, allowProvider: true });
|
||||
const input = "Hello [[tts:provider=edge]] world";
|
||||
const result = parseTtsDirectives(input, policy);
|
||||
|
||||
expect(result.overrides.provider).toBe("edge");
|
||||
});
|
||||
|
||||
it("rejects provider override by default while keeping voice overrides enabled", () => {
|
||||
const policy = resolveModelOverridePolicy({ enabled: true });
|
||||
const input = "Hello [[tts:provider=edge voice=alloy]] world";
|
||||
const result = parseTtsDirectives(input, policy);
|
||||
|
||||
expect(result.overrides.provider).toBeUndefined();
|
||||
expect(result.overrides.openai?.voice).toBe("alloy");
|
||||
});
|
||||
|
||||
it("keeps text intact when overrides are disabled", () => {
|
||||
const policy = resolveModelOverridePolicy({ enabled: false });
|
||||
const input = "Hello [[tts:voice=alloy]] world";
|
||||
|
||||
@@ -238,11 +238,12 @@ function resolveModelOverridePolicy(
|
||||
allowSeed: false,
|
||||
};
|
||||
}
|
||||
const allow = (value?: boolean) => value ?? true;
|
||||
const allow = (value: boolean | undefined, defaultValue = true) => value ?? defaultValue;
|
||||
return {
|
||||
enabled: true,
|
||||
allowText: allow(overrides?.allowText),
|
||||
allowProvider: allow(overrides?.allowProvider),
|
||||
// Provider switching is higher-impact than voice/style tweaks; keep opt-in.
|
||||
allowProvider: allow(overrides?.allowProvider, false),
|
||||
allowVoice: allow(overrides?.allowVoice),
|
||||
allowModelId: allow(overrides?.allowModelId),
|
||||
allowVoiceSettings: allow(overrides?.allowVoiceSettings),
|
||||
|
||||
Reference in New Issue
Block a user