fix(tts): make model provider overrides opt-in

This commit is contained in:
Peter Steinberger
2026-02-21 13:15:53 +01:00
parent d25a106628
commit f265d45840
5 changed files with 23 additions and 9 deletions

View File

@@ -1342,6 +1342,7 @@ Batches rapid text-only messages from the same sender into a single agent turn.
- `auto` controls auto-TTS. `/tts off|always|inbound|tagged` overrides per session.
- `summaryModel` overrides `agents.defaults.model.primary` for auto-summary.
- `modelOverrides` is enabled by default; `modelOverrides.allowProvider` defaults to `false` (opt-in).
- API keys fall back to `ELEVENLABS_API_KEY`/`XI_API_KEY` and `OPENAI_API_KEY`.
---

View File

@@ -210,6 +210,7 @@ Then run:
- `summaryModel`: optional cheap model for auto-summary; defaults to `agents.defaults.model.primary`.
- Accepts `provider/model` or a configured model alias.
- `modelOverrides`: allow the model to emit TTS directives (on by default).
- `allowProvider` defaults to `false` (provider switching is opt-in).
- `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
- `timeoutMs`: request timeout (ms).
- `prefsPath`: override the local prefs JSON path (provider/limit/summary).
@@ -242,18 +243,20 @@ for a single reply, plus an optional `[[tts:text]]...[[/tts:text]]` block to
provide expressive tags (laughter, singing cues, etc) that should only appear in
the audio.
`provider=...` directives are ignored unless `modelOverrides.allowProvider: true`.
Example reply payload:
```
Here you go.
[[tts:provider=elevenlabs voiceId=pMsXgVXv3BLzUgSXRplE model=eleven_v3 speed=1.1]]
[[tts:voiceId=pMsXgVXv3BLzUgSXRplE model=eleven_v3 speed=1.1]]
[[tts:text]](laughs) Read the song once more.[[/tts:text]]
```
Available directive keys (when enabled):
- `provider` (`openai` | `elevenlabs` | `edge`)
- `provider` (`openai` | `elevenlabs` | `edge`, requires `allowProvider: true`)
- `voice` (OpenAI voice) or `voiceId` (ElevenLabs)
- `model` (OpenAI TTS model or ElevenLabs model id)
- `stability`, `similarityBoost`, `style`, `speed`, `useSpeakerBoost`
@@ -275,7 +278,7 @@ Disable all model overrides:
}
```
Optional allowlist (disable specific overrides while keeping tags enabled):
Optional allowlist (enable provider switching while keeping other knobs configurable):
```json5
{
@@ -283,7 +286,7 @@ Optional allowlist (disable specific overrides while keeping tags enabled):
tts: {
modelOverrides: {
enabled: true,
allowProvider: false,
allowProvider: true,
allowSeed: false,
},
},

View File

@@ -9,7 +9,7 @@ export type TtsModelOverrideConfig = {
enabled?: boolean;
/** Allow model-provided TTS text blocks. */
allowText?: boolean;
/** Allow model-provided provider override. */
/** Allow model-provided provider override (default: false). */
allowProvider?: boolean;
/** Allow model-provided voice/voiceId override. */
allowVoice?: boolean;

View File

@@ -215,7 +215,7 @@ describe("tts", () => {
describe("parseTtsDirectives", () => {
it("extracts overrides and strips directives when enabled", () => {
const policy = resolveModelOverridePolicy({ enabled: true });
const policy = resolveModelOverridePolicy({ enabled: true, allowProvider: true });
const input =
"Hello [[tts:provider=elevenlabs voiceId=pMsXgVXv3BLzUgSXRplE stability=0.4 speed=1.1]] world\n\n" +
"[[tts:text]](laughs) Read the song once more.[[/tts:text]]";
@@ -230,13 +230,22 @@ describe("tts", () => {
});
it("accepts edge as provider override", () => {
const policy = resolveModelOverridePolicy({ enabled: true });
const policy = resolveModelOverridePolicy({ enabled: true, allowProvider: true });
const input = "Hello [[tts:provider=edge]] world";
const result = parseTtsDirectives(input, policy);
expect(result.overrides.provider).toBe("edge");
});
it("rejects provider override by default while keeping voice overrides enabled", () => {
const policy = resolveModelOverridePolicy({ enabled: true });
const input = "Hello [[tts:provider=edge voice=alloy]] world";
const result = parseTtsDirectives(input, policy);
expect(result.overrides.provider).toBeUndefined();
expect(result.overrides.openai?.voice).toBe("alloy");
});
it("keeps text intact when overrides are disabled", () => {
const policy = resolveModelOverridePolicy({ enabled: false });
const input = "Hello [[tts:voice=alloy]] world";

View File

@@ -238,11 +238,12 @@ function resolveModelOverridePolicy(
allowSeed: false,
};
}
const allow = (value?: boolean) => value ?? true;
const allow = (value: boolean | undefined, defaultValue = true) => value ?? defaultValue;
return {
enabled: true,
allowText: allow(overrides?.allowText),
allowProvider: allow(overrides?.allowProvider),
// Provider switching is higher-impact than voice/style tweaks; keep opt-in.
allowProvider: allow(overrides?.allowProvider, false),
allowVoice: allow(overrides?.allowVoice),
allowModelId: allow(overrides?.allowModelId),
allowVoiceSettings: allow(overrides?.allowVoiceSettings),