From 8e4674b09332484962f2b7663f768e349bbee317 Mon Sep 17 00:00:00 2001
From: Quentin Fuxa
Date: Wed, 7 May 2025 10:55:12 +0200
Subject: [PATCH] End of transcription : Properly sends signal back to the
endpoint
---
whisperlivekit/web/live_transcription.html | 147 ++++++++++++---------
1 file changed, 87 insertions(+), 60 deletions(-)
diff --git a/whisperlivekit/web/live_transcription.html b/whisperlivekit/web/live_transcription.html
index d75dddd..21a5ea2 100644
--- a/whisperlivekit/web/live_transcription.html
+++ b/whisperlivekit/web/live_transcription.html
@@ -308,6 +308,7 @@
let waveCtx = waveCanvas.getContext("2d");
let animationFrame = null;
let waitingForStop = false;
+ let lastReceivedData = null;
waveCanvas.width = 60 * (window.devicePixelRatio || 1);
waveCanvas.height = 30 * (window.devicePixelRatio || 1);
waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1);
@@ -357,18 +358,31 @@
websocket.onclose = () => {
if (userClosing) {
- if (!statusText.textContent.includes("Recording stopped. Processing final audio")) { // This is a bit of a hack. We should have a better way to handle this. eg. using a status code.
- statusText.textContent = "Finished processing audio! Ready to record again.";
+ if (waitingForStop) {
+ statusText.textContent = "Processing finalized or connection closed.";
+ if (lastReceivedData) {
+ renderLinesWithBuffer(
+ lastReceivedData.lines || [],
+ lastReceivedData.buffer_diarization || "",
+ lastReceivedData.buffer_transcription || "",
+ 0, 0, true // isFinalizing = true
+ );
+ }
}
- waitingForStop = false;
+ // If ready_to_stop was received, statusText is already "Finished processing..."
+ // and waitingForStop is false.
} else {
- statusText.textContent =
- "Disconnected from the WebSocket server. (Check logs if model is loading.)";
+ statusText.textContent = "Disconnected from the WebSocket server. (Check logs if model is loading.)";
if (isRecording) {
- stopRecording();
+ stopRecording();
}
}
- userClosing = false;
+ isRecording = false;
+ waitingForStop = false;
+ userClosing = false;
+ lastReceivedData = null;
+ websocket = null;
+ updateUI();
};
websocket.onerror = () => {
@@ -382,24 +396,31 @@
// Check for status messages
if (data.type === "ready_to_stop") {
- console.log("Ready to stop, closing WebSocket");
-
- // signal that we are not waiting for stop anymore
+ console.log("Ready to stop received, finalizing display and closing WebSocket.");
waitingForStop = false;
- recordButton.disabled = false; // this should be elsewhere
- console.log("Record button enabled");
- //Now we can close the WebSocket
- if (websocket) {
- websocket.close();
- websocket = null;
+ if (lastReceivedData) {
+ renderLinesWithBuffer(
+ lastReceivedData.lines || [],
+ lastReceivedData.buffer_diarization || "",
+ lastReceivedData.buffer_transcription || "",
+ 0, // No more lag
+ 0, // No more lag
+ true // isFinalizing = true
+ );
}
-
-
+ statusText.textContent = "Finished processing audio! Ready to record again.";
+ recordButton.disabled = false;
+ if (websocket) {
+ websocket.close(); // will trigger onclose
+ // websocket = null; // onclose handle setting websocket to null
+ }
return;
}
+ lastReceivedData = data;
+
// Handle normal transcription updates
const {
lines = [],
@@ -414,13 +435,14 @@
buffer_diarization,
buffer_transcription,
remaining_time_diarization,
- remaining_time_transcription
+ remaining_time_transcription,
+ false // isFinalizing = false for normal updates
);
};
});
}
- function renderLinesWithBuffer(lines, buffer_diarization, buffer_transcription, remaining_time_diarization, remaining_time_transcription) {
+ function renderLinesWithBuffer(lines, buffer_diarization, buffer_transcription, remaining_time_diarization, remaining_time_transcription, isFinalizing = false) {
const linesHtml = lines.map((item, idx) => {
let timeInfo = "";
if (item.beg !== undefined && item.end !== undefined) {
@@ -430,30 +452,46 @@
let speakerLabel = "";
if (item.speaker === -2) {
speakerLabel = `Silence${timeInfo}`;
- } else if (item.speaker == 0) {
+ } else if (item.speaker == 0 && !isFinalizing) {
speakerLabel = `${remaining_time_diarization} second(s) of audio are undergoing diarization`;
} else if (item.speaker == -1) {
- speakerLabel = `${timeInfo}`;
- } else if (item.speaker !== -1) {
+ speakerLabel = `Speaker 1${timeInfo}`;
+ } else if (item.speaker !== -1 && item.speaker !== 0) {
speakerLabel = `Speaker ${item.speaker}${timeInfo}`;
}
- let textContent = item.text;
- if (idx === lines.length - 1) {
- speakerLabel += `Transcription lag ${remaining_time_transcription}s`
- }
- if (idx === lines.length - 1 && buffer_diarization) {
- speakerLabel += `Diarization lag${remaining_time_diarization}s`
- textContent += `${buffer_diarization}`;
- }
- if (idx === lines.length - 1) {
- textContent += `${buffer_transcription}`;
+
+ let currentLineText = item.text || "";
+
+ if (idx === lines.length - 1) {
+ if (!isFinalizing) {
+ if (remaining_time_transcription > 0) {
+ speakerLabel += `Transcription lag ${remaining_time_transcription}s`;
+ }
+ if (buffer_diarization && remaining_time_diarization > 0) {
+ speakerLabel += `Diarization lag${remaining_time_diarization}s`;
+ }
+ }
+
+ if (buffer_diarization) {
+ if (isFinalizing) {
+ currentLineText += (currentLineText.length > 0 && buffer_diarization.trim().length > 0 ? " " : "") + buffer_diarization.trim();
+ } else {
+ currentLineText += `${buffer_diarization}`;
+ }
+ }
+ if (buffer_transcription) {
+ if (isFinalizing) {
+ currentLineText += (currentLineText.length > 0 && buffer_transcription.trim().length > 0 ? " " : "") + buffer_transcription.trim();
+ } else {
+ currentLineText += `${buffer_transcription}`;
+ }
+ }
}
-
- return textContent
- ? `${speakerLabel}
${textContent}
`
- : `${speakerLabel}
`;
+ return currentLineText.trim().length > 0 || speakerLabel.length > 0
+ ? `${speakerLabel}
${currentLineText}
`
+ : `${speakerLabel}
`;
}).join("");
linesTranscriptDiv.innerHTML = linesHtml;
@@ -578,20 +616,6 @@
timerElement.textContent = "00:00";
startTime = null;
- if (websocket && websocket.readyState === WebSocket.OPEN) {
- try {
- await websocket.send(JSON.stringify({
- type: "stop",
- message: "User stopped recording"
- }));
- statusText.textContent = "Recording stopped. Processing final audio...";
- } catch (e) {
- console.error("Could not send stop message:", e);
- statusText.textContent = "Recording stopped. Error during final audio processing.";
- websocket.close();
- websocket = null;
- }
- }
isRecording = false;
updateUI();
@@ -625,19 +649,22 @@
function updateUI() {
recordButton.classList.toggle("recording", isRecording);
-
+ recordButton.disabled = waitingForStop;
+
if (waitingForStop) {
- statusText.textContent = "Please wait for processing to complete...";
- recordButton.disabled = true; // Optionally disable the button while waiting
- console.log("Record button disabled");
+ if (statusText.textContent !== "Recording stopped. Processing final audio...") {
+ statusText.textContent = "Please wait for processing to complete...";
+ }
} else if (isRecording) {
statusText.textContent = "Recording...";
- recordButton.disabled = false;
- console.log("Record button enabled");
} else {
- statusText.textContent = "Click to start transcription";
+ if (statusText.textContent !== "Finished processing audio! Ready to record again." &&
+ statusText.textContent !== "Processing finalized or connection closed.") {
+ statusText.textContent = "Click to start transcription";
+ }
+ }
+ if (!waitingForStop) {
recordButton.disabled = false;
- console.log("Record button enabled");
}
}
@@ -645,4 +672,4 @@