From 8e4674b09332484962f2b7663f768e349bbee317 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Wed, 7 May 2025 10:55:12 +0200 Subject: [PATCH] End of transcription : Properly sends signal back to the endpoint --- whisperlivekit/web/live_transcription.html | 147 ++++++++++++--------- 1 file changed, 87 insertions(+), 60 deletions(-) diff --git a/whisperlivekit/web/live_transcription.html b/whisperlivekit/web/live_transcription.html index d75dddd..21a5ea2 100644 --- a/whisperlivekit/web/live_transcription.html +++ b/whisperlivekit/web/live_transcription.html @@ -308,6 +308,7 @@ let waveCtx = waveCanvas.getContext("2d"); let animationFrame = null; let waitingForStop = false; + let lastReceivedData = null; waveCanvas.width = 60 * (window.devicePixelRatio || 1); waveCanvas.height = 30 * (window.devicePixelRatio || 1); waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1); @@ -357,18 +358,31 @@ websocket.onclose = () => { if (userClosing) { - if (!statusText.textContent.includes("Recording stopped. Processing final audio")) { // This is a bit of a hack. We should have a better way to handle this. eg. using a status code. - statusText.textContent = "Finished processing audio! Ready to record again."; + if (waitingForStop) { + statusText.textContent = "Processing finalized or connection closed."; + if (lastReceivedData) { + renderLinesWithBuffer( + lastReceivedData.lines || [], + lastReceivedData.buffer_diarization || "", + lastReceivedData.buffer_transcription || "", + 0, 0, true // isFinalizing = true + ); + } } - waitingForStop = false; + // If ready_to_stop was received, statusText is already "Finished processing..." + // and waitingForStop is false. } else { - statusText.textContent = - "Disconnected from the WebSocket server. (Check logs if model is loading.)"; + statusText.textContent = "Disconnected from the WebSocket server. (Check logs if model is loading.)"; if (isRecording) { - stopRecording(); + stopRecording(); } } - userClosing = false; + isRecording = false; + waitingForStop = false; + userClosing = false; + lastReceivedData = null; + websocket = null; + updateUI(); }; websocket.onerror = () => { @@ -382,24 +396,31 @@ // Check for status messages if (data.type === "ready_to_stop") { - console.log("Ready to stop, closing WebSocket"); - - // signal that we are not waiting for stop anymore + console.log("Ready to stop received, finalizing display and closing WebSocket."); waitingForStop = false; - recordButton.disabled = false; // this should be elsewhere - console.log("Record button enabled"); - //Now we can close the WebSocket - if (websocket) { - websocket.close(); - websocket = null; + if (lastReceivedData) { + renderLinesWithBuffer( + lastReceivedData.lines || [], + lastReceivedData.buffer_diarization || "", + lastReceivedData.buffer_transcription || "", + 0, // No more lag + 0, // No more lag + true // isFinalizing = true + ); } - - + statusText.textContent = "Finished processing audio! Ready to record again."; + recordButton.disabled = false; + if (websocket) { + websocket.close(); // will trigger onclose + // websocket = null; // onclose handle setting websocket to null + } return; } + lastReceivedData = data; + // Handle normal transcription updates const { lines = [], @@ -414,13 +435,14 @@ buffer_diarization, buffer_transcription, remaining_time_diarization, - remaining_time_transcription + remaining_time_transcription, + false // isFinalizing = false for normal updates ); }; }); } - function renderLinesWithBuffer(lines, buffer_diarization, buffer_transcription, remaining_time_diarization, remaining_time_transcription) { + function renderLinesWithBuffer(lines, buffer_diarization, buffer_transcription, remaining_time_diarization, remaining_time_transcription, isFinalizing = false) { const linesHtml = lines.map((item, idx) => { let timeInfo = ""; if (item.beg !== undefined && item.end !== undefined) { @@ -430,30 +452,46 @@ let speakerLabel = ""; if (item.speaker === -2) { speakerLabel = `Silence${timeInfo}`; - } else if (item.speaker == 0) { + } else if (item.speaker == 0 && !isFinalizing) { speakerLabel = `${remaining_time_diarization} second(s) of audio are undergoing diarization`; } else if (item.speaker == -1) { - speakerLabel = `${timeInfo}`; - } else if (item.speaker !== -1) { + speakerLabel = `Speaker 1${timeInfo}`; + } else if (item.speaker !== -1 && item.speaker !== 0) { speakerLabel = `Speaker ${item.speaker}${timeInfo}`; } - let textContent = item.text; - if (idx === lines.length - 1) { - speakerLabel += `Transcription lag ${remaining_time_transcription}s` - } - if (idx === lines.length - 1 && buffer_diarization) { - speakerLabel += `Diarization lag${remaining_time_diarization}s` - textContent += `${buffer_diarization}`; - } - if (idx === lines.length - 1) { - textContent += `${buffer_transcription}`; + + let currentLineText = item.text || ""; + + if (idx === lines.length - 1) { + if (!isFinalizing) { + if (remaining_time_transcription > 0) { + speakerLabel += `Transcription lag ${remaining_time_transcription}s`; + } + if (buffer_diarization && remaining_time_diarization > 0) { + speakerLabel += `Diarization lag${remaining_time_diarization}s`; + } + } + + if (buffer_diarization) { + if (isFinalizing) { + currentLineText += (currentLineText.length > 0 && buffer_diarization.trim().length > 0 ? " " : "") + buffer_diarization.trim(); + } else { + currentLineText += `${buffer_diarization}`; + } + } + if (buffer_transcription) { + if (isFinalizing) { + currentLineText += (currentLineText.length > 0 && buffer_transcription.trim().length > 0 ? " " : "") + buffer_transcription.trim(); + } else { + currentLineText += `${buffer_transcription}`; + } + } } - - return textContent - ? `

${speakerLabel}

${textContent}

` - : `

${speakerLabel}

`; + return currentLineText.trim().length > 0 || speakerLabel.length > 0 + ? `

${speakerLabel}

${currentLineText}

` + : `

${speakerLabel}

`; }).join(""); linesTranscriptDiv.innerHTML = linesHtml; @@ -578,20 +616,6 @@ timerElement.textContent = "00:00"; startTime = null; - if (websocket && websocket.readyState === WebSocket.OPEN) { - try { - await websocket.send(JSON.stringify({ - type: "stop", - message: "User stopped recording" - })); - statusText.textContent = "Recording stopped. Processing final audio..."; - } catch (e) { - console.error("Could not send stop message:", e); - statusText.textContent = "Recording stopped. Error during final audio processing."; - websocket.close(); - websocket = null; - } - } isRecording = false; updateUI(); @@ -625,19 +649,22 @@ function updateUI() { recordButton.classList.toggle("recording", isRecording); - + recordButton.disabled = waitingForStop; + if (waitingForStop) { - statusText.textContent = "Please wait for processing to complete..."; - recordButton.disabled = true; // Optionally disable the button while waiting - console.log("Record button disabled"); + if (statusText.textContent !== "Recording stopped. Processing final audio...") { + statusText.textContent = "Please wait for processing to complete..."; + } } else if (isRecording) { statusText.textContent = "Recording..."; - recordButton.disabled = false; - console.log("Record button enabled"); } else { - statusText.textContent = "Click to start transcription"; + if (statusText.textContent !== "Finished processing audio! Ready to record again." && + statusText.textContent !== "Processing finalized or connection closed.") { + statusText.textContent = "Click to start transcription"; + } + } + if (!waitingForStop) { recordButton.disabled = false; - console.log("Record button enabled"); } } @@ -645,4 +672,4 @@ - \ No newline at end of file +