diff --git a/extensions/voice-call/src/providers/twilio.ts b/extensions/voice-call/src/providers/twilio.ts index 245c5e2bc3b..43af8e299a3 100644 --- a/extensions/voice-call/src/providers/twilio.ts +++ b/extensions/voice-call/src/providers/twilio.ts @@ -62,6 +62,17 @@ export class TwilioProvider implements VoiceCallProvider { /** Map of call SID to stream SID for media streams */ private callStreamMap = new Map(); + /** Pre-generated greeting audio for instant inbound playback */ + private cachedGreetingAudio: Buffer | null = null; + + setCachedGreetingAudio(audio: Buffer): void { + this.cachedGreetingAudio = audio; + console.log(`[voice-call] Cached greeting audio: ${audio.length} bytes`); + } + + getCachedGreetingAudio(): Buffer | null { + return this.cachedGreetingAudio; + } /** Per-call tokens for media stream authentication */ private streamAuthTokens = new Map(); diff --git a/extensions/voice-call/src/runtime.ts b/extensions/voice-call/src/runtime.ts index 811a9074037..72365b9ccb5 100644 --- a/extensions/voice-call/src/runtime.ts +++ b/extensions/voice-call/src/runtime.ts @@ -187,6 +187,35 @@ export async function createVoiceCallRuntime(params: { twilioProvider.setMediaStreamHandler(mediaHandler); log.info("[voice-call] Media stream handler wired to provider"); } + + // Pre-cache inbound greeting TTS for instant playback on connect + if (config.inboundGreeting && ttsRuntime?.textToSpeechTelephony) { + try { + const greetingTts = createTelephonyTtsProvider({ + coreConfig, + ttsOverride: config.tts, + runtime: ttsRuntime, + }); + greetingTts + .synthesizeForTelephony(config.inboundGreeting) + .then((audio) => { + twilioProvider.setCachedGreetingAudio(audio); + }) + .catch((err) => { + log.warn( + `[voice-call] Failed to pre-cache greeting: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + }); + } catch (err) { + log.warn( + `[voice-call] Failed to init greeting TTS: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } + } } manager.initialize(provider, webhookUrl); diff --git a/extensions/voice-call/src/webhook.ts b/extensions/voice-call/src/webhook.ts index 79ecc843cd4..08f417b92d0 100644 --- a/extensions/voice-call/src/webhook.ts +++ b/extensions/voice-call/src/webhook.ts @@ -141,13 +141,41 @@ export class VoiceCallWebhookServer { (this.provider as TwilioProvider).registerCallStream(callId, streamSid); } - // Speak initial message if one was provided when call was initiated - // Use setTimeout to allow stream setup to complete - setTimeout(() => { - this.manager.speakInitialMessage(callId).catch((err) => { - console.warn(`[voice-call] Failed to speak initial message:`, err); - }); - }, 500); + // Try instant cached greeting for inbound calls (pre-generated at startup) + const cachedAudio = + this.provider.name === "twilio" + ? (this.provider as TwilioProvider).getCachedGreetingAudio() + : null; + const call = this.manager.getCallByProviderCallId(callId); + if (cachedAudio && call?.metadata?.initialMessage && call.direction === "inbound") { + console.log(`[voice-call] Playing cached greeting (${cachedAudio.length} bytes)`); + delete call.metadata.initialMessage; // prevent re-speaking via fallback + const handler = this.mediaStreamHandler!; + const CHUNK_SIZE = 160; + const CHUNK_DELAY_MS = 20; + void (async () => { + const { chunkAudio } = await import("./telephony-audio.js"); + await handler.queueTts(streamSid, async (signal) => { + for (const chunk of chunkAudio(cachedAudio, CHUNK_SIZE)) { + if (signal.aborted) break; + handler.sendAudio(streamSid, chunk); + await new Promise((r) => setTimeout(r, CHUNK_DELAY_MS)); + } + if (!signal.aborted) { + handler.sendMark(streamSid, `greeting-${Date.now()}`); + } + }); + })().catch((err) => + console.warn("[voice-call] Cached greeting playback failed:", err), + ); + } else { + // Fallback: original path with reduced delay + setTimeout(() => { + this.manager.speakInitialMessage(callId).catch((err) => { + console.warn(`[voice-call] Failed to speak initial message:`, err); + }); + }, 100); + } }, onDisconnect: (callId) => { console.log(`[voice-call] Media stream disconnected: ${callId}`);