From d089c7fce04421711fadbd145cbc92e46bd1163a Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Wed, 20 Aug 2025 20:00:31 +0200 Subject: [PATCH] .html to .html + .css + .js --- pyproject.toml | 4 +- whisperlivekit/basic_server.py | 5 + whisperlivekit/web/live_transcription.css | 388 +++++++++ whisperlivekit/web/live_transcription.html | 897 ++------------------- whisperlivekit/web/live_transcription.js | 515 ++++++++++++ whisperlivekit/web/src/dark_mode.svg | 1 + whisperlivekit/web/src/light_mode.svg | 1 + whisperlivekit/web/src/system_mode.svg | 1 + whisperlivekit/web/web_interface.py | 22 +- 9 files changed, 982 insertions(+), 852 deletions(-) create mode 100644 whisperlivekit/web/live_transcription.css create mode 100644 whisperlivekit/web/live_transcription.js create mode 100644 whisperlivekit/web/src/dark_mode.svg create mode 100644 whisperlivekit/web/src/light_mode.svg create mode 100644 whisperlivekit/web/src/system_mode.svg diff --git a/pyproject.toml b/pyproject.toml index e1d4140..59903a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "whisperlivekit" -version = "0.2.5" +version = "0.2.5.post1" description = "Real-time, Fully Local Whisper's Speech-to-Text and Speaker Diarization" readme = "README.md" authors = [ @@ -52,5 +52,5 @@ whisperlivekit-server = "whisperlivekit.basic_server:main" packages = ["whisperlivekit", "whisperlivekit.diarization", "whisperlivekit.simul_whisper", "whisperlivekit.simul_whisper.whisper", "whisperlivekit.simul_whisper.whisper.assets", "whisperlivekit.simul_whisper.whisper.normalizers", "whisperlivekit.web", "whisperlivekit.whisper_streaming_custom"] [tool.setuptools.package-data] -whisperlivekit = ["web/*.html"] +whisperlivekit = ["web/*.html", "web/*.css", "web/*.js", "web/src/*.svg"] "whisperlivekit.simul_whisper.whisper.assets" = ["*.tiktoken", "*.npz"] diff --git a/whisperlivekit/basic_server.py b/whisperlivekit/basic_server.py index 9ce0a1e..b49af59 100644 --- a/whisperlivekit/basic_server.py +++ b/whisperlivekit/basic_server.py @@ -5,6 +5,9 @@ from fastapi.middleware.cors import CORSMiddleware from whisperlivekit import TranscriptionEngine, AudioProcessor, get_web_interface_html, parse_args import asyncio import logging +from starlette.staticfiles import StaticFiles +import pathlib +import whisperlivekit.web as webpkg logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logging.getLogger().setLevel(logging.WARNING) @@ -30,6 +33,8 @@ app.add_middleware( allow_methods=["*"], allow_headers=["*"], ) +web_dir = pathlib.Path(webpkg.__file__).parent +app.mount("/web", StaticFiles(directory=str(web_dir)), name="web") @app.get("/") async def get(): diff --git a/whisperlivekit/web/live_transcription.css b/whisperlivekit/web/live_transcription.css new file mode 100644 index 0000000..be5e8b6 --- /dev/null +++ b/whisperlivekit/web/live_transcription.css @@ -0,0 +1,388 @@ +:root { + --bg: #ffffff; + --text: #111111; + --muted: #666666; + --border: #e5e5e5; + --chip-bg: rgba(0, 0, 0, 0.04); + --chip-text: #000000; + --spinner-border: #8d8d8d5c; + --spinner-top: #b0b0b0; + --silence-bg: #f3f3f3; + --loading-bg: rgba(255, 77, 77, 0.06); + --button-bg: #ffffff; + --button-border: #e9e9e9; + --wave-stroke: #000000; + --label-dia-text: #868686; + --label-trans-text: #111111; +} + +@media (prefers-color-scheme: dark) { + :root:not([data-theme="light"]) { + --bg: #0b0b0b; + --text: #e6e6e6; + --muted: #9aa0a6; + --border: #333333; + --chip-bg: rgba(255, 255, 255, 0.08); + --chip-text: #e6e6e6; + --spinner-border: #555555; + --spinner-top: #dddddd; + --silence-bg: #1a1a1a; + --loading-bg: rgba(255, 77, 77, 0.12); + --button-bg: #111111; + --button-border: #333333; + --wave-stroke: #e6e6e6; + --label-dia-text: #b3b3b3; + --label-trans-text: #ffffff; + } +} + +:root[data-theme="dark"] { + --bg: #0b0b0b; + --text: #e6e6e6; + --muted: #9aa0a6; + --border: #333333; + --chip-bg: rgba(255, 255, 255, 0.08); + --chip-text: #e6e6e6; + --spinner-border: #555555; + --spinner-top: #dddddd; + --silence-bg: #1a1a1a; + --loading-bg: rgba(255, 77, 77, 0.12); + --button-bg: #111111; + --button-border: #333333; + --wave-stroke: #e6e6e6; + --label-dia-text: #b3b3b3; + --label-trans-text: #ffffff; +} + +:root[data-theme="light"] { + --bg: #ffffff; + --text: #111111; + --muted: #666666; + --border: #e5e5e5; + --chip-bg: rgba(0, 0, 0, 0.04); + --chip-text: #000000; + --spinner-border: #8d8d8d5c; + --spinner-top: #b0b0b0; + --silence-bg: #f3f3f3; + --loading-bg: rgba(255, 77, 77, 0.06); + --button-bg: #ffffff; + --button-border: #e9e9e9; + --wave-stroke: #000000; + --label-dia-text: #868686; + --label-trans-text: #111111; +} + +body { + font-family: ui-sans-serif, system-ui, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji'; + margin: 20px; + text-align: center; + background-color: var(--bg); + color: var(--text); +} + +/* Record button */ +#recordButton { + width: 50px; + height: 50px; + border: none; + border-radius: 50%; + background-color: var(--button-bg); + cursor: pointer; + transition: all 0.3s ease; + border: 1px solid var(--button-border); + display: flex; + align-items: center; + justify-content: center; + position: relative; +} + +#recordButton.recording { + width: 180px; + border-radius: 40px; + justify-content: flex-start; + padding-left: 20px; +} + +#recordButton:active { + transform: scale(0.95); +} + +.shape-container { + width: 25px; + height: 25px; + display: flex; + align-items: center; + justify-content: center; + flex-shrink: 0; +} + +.shape { + width: 25px; + height: 25px; + background-color: rgb(209, 61, 53); + border-radius: 50%; + transition: all 0.3s ease; +} + +#recordButton:disabled .shape { + background-color: #6e6d6d; +} + +#recordButton.recording .shape { + border-radius: 5px; + width: 25px; + height: 25px; +} + +/* Recording elements */ +.recording-info { + display: none; + align-items: center; + margin-left: 15px; + flex-grow: 1; +} + +#recordButton.recording .recording-info { + display: flex; +} + +.wave-container { + width: 60px; + height: 30px; + position: relative; + display: flex; + align-items: center; + justify-content: center; +} + +#waveCanvas { + width: 100%; + height: 100%; +} + +.timer { + font-size: 14px; + font-weight: 500; + color: var(--text); + margin-left: 10px; +} + +#status { + margin-top: 20px; + font-size: 16px; + color: var(--text); +} + +/* Settings */ +.settings-container { + display: flex; + justify-content: center; + align-items: center; + gap: 15px; + margin-top: 20px; +} + +.settings { + display: flex; + flex-direction: column; + align-items: flex-start; + gap: 12px; +} + +.field { + display: flex; + flex-direction: column; + align-items: flex-start; + gap: 6px; +} + +#chunkSelector, +#websocketInput, +#themeSelector { + font-size: 16px; + padding: 5px 8px; + border-radius: 8px; + border: 1px solid var(--border); + background-color: var(--button-bg); + color: var(--text); + max-height: 34px; +} + +#websocketInput { + width: 220px; +} + +#chunkSelector:focus, +#websocketInput:focus, +#themeSelector:focus { + outline: none; + border-color: #007bff; + box-shadow: 0 0 0 3px rgba(0, 123, 255, 0.15); +} + +label { + font-size: 13px; + color: var(--muted); +} + +.ws-default { + font-size: 12px; + color: var(--muted); +} + +/* Segmented pill control for Theme */ +.segmented { + display: inline-flex; + align-items: stretch; + border: 1px solid var(--button-border); + background-color: var(--button-bg); + border-radius: 999px; + overflow: hidden; +} + +.segmented input[type="radio"] { + position: absolute; + opacity: 0; + pointer-events: none; +} + +.segmented label { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 6px 12px; + font-size: 14px; + color: var(--muted); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease, color 0.2s ease; +} + +.segmented label:hover { + background-color: var(--chip-bg); +} + +.segmented img { + width: 16px; + height: 16px; +} + +.segmented input[type="radio"]:checked + label { + background-color: var(--chip-bg); + color: var(--text); +} + +.segmented input[type="radio"]:focus-visible + label, +.segmented input[type="radio"]:focus + label { + outline: 2px solid #007bff; + outline-offset: 2px; + border-radius: 999px; +} + +/* Transcript area */ +#linesTranscript { + margin: 20px auto; + max-width: 700px; + text-align: left; + font-size: 16px; +} + +#linesTranscript p { + margin: 0px 0; +} + +#linesTranscript strong { + color: var(--text); +} + +#speaker { + border: 1px solid var(--border); + border-radius: 100px; + padding: 2px 10px; + font-size: 14px; + margin-bottom: 0px; +} + +.label_diarization { + background-color: var(--chip-bg); + border-radius: 8px 8px 8px 8px; + padding: 2px 10px; + margin-left: 10px; + display: inline-block; + white-space: nowrap; + font-size: 14px; + margin-bottom: 0px; + color: var(--label-dia-text); +} + +.label_transcription { + background-color: var(--chip-bg); + border-radius: 8px 8px 8px 8px; + padding: 2px 10px; + display: inline-block; + white-space: nowrap; + margin-left: 10px; + font-size: 14px; + margin-bottom: 0px; + color: var(--label-trans-text); +} + +#timeInfo { + color: var(--muted); + margin-left: 10px; +} + +.textcontent { + font-size: 16px; + padding-left: 10px; + margin-bottom: 10px; + margin-top: 1px; + padding-top: 5px; + border-radius: 0px 0px 0px 10px; +} + +.buffer_diarization { + color: var(--label-dia-text); + margin-left: 4px; +} + +.buffer_transcription { + color: #7474748c; + margin-left: 4px; +} + +.spinner { + display: inline-block; + width: 8px; + height: 8px; + border: 2px solid var(--spinner-border); + border-top: 2px solid var(--spinner-top); + border-radius: 50%; + animation: spin 0.7s linear infinite; + vertical-align: middle; + margin-bottom: 2px; + margin-right: 5px; +} + +@keyframes spin { + to { + transform: rotate(360deg); + } +} + +.silence { + color: var(--muted); + background-color: var(--silence-bg); + font-size: 13px; + border-radius: 30px; + padding: 2px 10px; +} + +.loading { + color: var(--muted); + background-color: var(--loading-bg); + border-radius: 8px 8px 8px 0px; + padding: 2px 10px; + font-size: 14px; + margin-bottom: 0px; +} diff --git a/whisperlivekit/web/live_transcription.html b/whisperlivekit/web/live_transcription.html index b851e8e..a95e0bf 100644 --- a/whisperlivekit/web/live_transcription.html +++ b/whisperlivekit/web/live_transcription.html @@ -1,861 +1,60 @@ - - - - WhisperLiveKit - + + + WhisperLiveKit + - - -
- -
-
- - -
-
- - -
-
- - -
+
+ + +
+
+ + +
+ +
+ +
+ + + + + + + + +
+
+
-

+

- -
+
- + - diff --git a/whisperlivekit/web/live_transcription.js b/whisperlivekit/web/live_transcription.js new file mode 100644 index 0000000..8661219 --- /dev/null +++ b/whisperlivekit/web/live_transcription.js @@ -0,0 +1,515 @@ +/* Theme, WebSocket, recording, rendering logic extracted from inline script and adapted for segmented theme control and WS caption */ + +let isRecording = false; +let websocket = null; +let recorder = null; +let chunkDuration = 1000; +let websocketUrl = "ws://localhost:8000/asr"; +let userClosing = false; +let wakeLock = null; +let startTime = null; +let timerInterval = null; +let audioContext = null; +let analyser = null; +let microphone = null; +let waveCanvas = document.getElementById("waveCanvas"); +let waveCtx = waveCanvas.getContext("2d"); +let animationFrame = null; +let waitingForStop = false; +let lastReceivedData = null; +let lastSignature = null; + +waveCanvas.width = 60 * (window.devicePixelRatio || 1); +waveCanvas.height = 30 * (window.devicePixelRatio || 1); +waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1); + +const statusText = document.getElementById("status"); +const recordButton = document.getElementById("recordButton"); +const chunkSelector = document.getElementById("chunkSelector"); +const websocketInput = document.getElementById("websocketInput"); +const websocketDefaultSpan = document.getElementById("wsDefaultUrl"); +const linesTranscriptDiv = document.getElementById("linesTranscript"); +const timerElement = document.querySelector(".timer"); +const themeRadios = document.querySelectorAll('input[name="theme"]'); + +function getWaveStroke() { + const styles = getComputedStyle(document.documentElement); + const v = styles.getPropertyValue("--wave-stroke").trim(); + return v || "#000"; +} + +let waveStroke = getWaveStroke(); +function updateWaveStroke() { + waveStroke = getWaveStroke(); +} + +function applyTheme(pref) { + if (pref === "light") { + document.documentElement.setAttribute("data-theme", "light"); + } else if (pref === "dark") { + document.documentElement.setAttribute("data-theme", "dark"); + } else { + document.documentElement.removeAttribute("data-theme"); + } + updateWaveStroke(); +} + +// Persisted theme preference +const savedThemePref = localStorage.getItem("themePreference") || "system"; +applyTheme(savedThemePref); +if (themeRadios.length) { + themeRadios.forEach((r) => { + r.checked = r.value === savedThemePref; + r.addEventListener("change", () => { + if (r.checked) { + localStorage.setItem("themePreference", r.value); + applyTheme(r.value); + } + }); + }); +} + +// React to OS theme changes when in "system" mode +const darkMq = window.matchMedia && window.matchMedia("(prefers-color-scheme: dark)"); +const handleOsThemeChange = () => { + const pref = localStorage.getItem("themePreference") || "system"; + if (pref === "system") updateWaveStroke(); +}; +if (darkMq && darkMq.addEventListener) { + darkMq.addEventListener("change", handleOsThemeChange); +} else if (darkMq && darkMq.addListener) { + // deprecated, but included for Safari compatibility + darkMq.addListener(handleOsThemeChange); +} + +// Helpers +function fmt1(x) { + const n = Number(x); + return Number.isFinite(n) ? n.toFixed(1) : x; +} + +// Default WebSocket URL computation +const host = window.location.hostname || "localhost"; +const port = window.location.port; +const protocol = window.location.protocol === "https:" ? "wss" : "ws"; +const defaultWebSocketUrl = `${protocol}://${host}${port ? ":" + port : ""}/asr`; + +// Populate default caption and input +if (websocketDefaultSpan) websocketDefaultSpan.textContent = defaultWebSocketUrl; +websocketInput.value = defaultWebSocketUrl; +websocketUrl = defaultWebSocketUrl; + +// Optional chunk selector (guard for presence) +if (chunkSelector) { + chunkSelector.addEventListener("change", () => { + chunkDuration = parseInt(chunkSelector.value); + }); +} + +// WebSocket input change handling +websocketInput.addEventListener("change", () => { + const urlValue = websocketInput.value.trim(); + if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) { + statusText.textContent = "Invalid WebSocket URL (must start with ws:// or wss://)"; + return; + } + websocketUrl = urlValue; + statusText.textContent = "WebSocket URL updated. Ready to connect."; +}); + +function setupWebSocket() { + return new Promise((resolve, reject) => { + try { + websocket = new WebSocket(websocketUrl); + } catch (error) { + statusText.textContent = "Invalid WebSocket URL. Please check and try again."; + reject(error); + return; + } + + websocket.onopen = () => { + statusText.textContent = "Connected to server."; + resolve(); + }; + + websocket.onclose = () => { + if (userClosing) { + if (waitingForStop) { + statusText.textContent = "Processing finalized or connection closed."; + if (lastReceivedData) { + renderLinesWithBuffer( + lastReceivedData.lines || [], + lastReceivedData.buffer_diarization || "", + lastReceivedData.buffer_transcription || "", + 0, + 0, + true + ); + } + } + } else { + statusText.textContent = "Disconnected from the WebSocket server. (Check logs if model is loading.)"; + if (isRecording) { + stopRecording(); + } + } + isRecording = false; + waitingForStop = false; + userClosing = false; + lastReceivedData = null; + websocket = null; + updateUI(); + }; + + websocket.onerror = () => { + statusText.textContent = "Error connecting to WebSocket."; + reject(new Error("Error connecting to WebSocket")); + }; + + websocket.onmessage = (event) => { + const data = JSON.parse(event.data); + + if (data.type === "ready_to_stop") { + console.log("Ready to stop received, finalizing display and closing WebSocket."); + waitingForStop = false; + + if (lastReceivedData) { + renderLinesWithBuffer( + lastReceivedData.lines || [], + lastReceivedData.buffer_diarization || "", + lastReceivedData.buffer_transcription || "", + 0, + 0, + true + ); + } + statusText.textContent = "Finished processing audio! Ready to record again."; + recordButton.disabled = false; + + if (websocket) { + websocket.close(); + } + return; + } + + lastReceivedData = data; + + const { + lines = [], + buffer_transcription = "", + buffer_diarization = "", + remaining_time_transcription = 0, + remaining_time_diarization = 0, + status = "active_transcription", + } = data; + + renderLinesWithBuffer( + lines, + buffer_diarization, + buffer_transcription, + remaining_time_diarization, + remaining_time_transcription, + false, + status + ); + }; + }); +} + +function renderLinesWithBuffer( + lines, + buffer_diarization, + buffer_transcription, + remaining_time_diarization, + remaining_time_transcription, + isFinalizing = false, + current_status = "active_transcription" +) { + if (current_status === "no_audio_detected") { + linesTranscriptDiv.innerHTML = + "

No audio detected...

"; + return; + } + + const showLoading = !isFinalizing && (lines || []).some((it) => it.speaker == 0); + const showTransLag = !isFinalizing && remaining_time_transcription > 0; + const showDiaLag = !isFinalizing && !!buffer_diarization && remaining_time_diarization > 0; + const signature = JSON.stringify({ + lines: (lines || []).map((it) => ({ speaker: it.speaker, text: it.text, beg: it.beg, end: it.end })), + buffer_transcription: buffer_transcription || "", + buffer_diarization: buffer_diarization || "", + status: current_status, + showLoading, + showTransLag, + showDiaLag, + isFinalizing: !!isFinalizing, + }); + if (lastSignature === signature) { + const t = document.querySelector(".lag-transcription-value"); + if (t) t.textContent = fmt1(remaining_time_transcription); + const d = document.querySelector(".lag-diarization-value"); + if (d) d.textContent = fmt1(remaining_time_diarization); + const ld = document.querySelector(".loading-diarization-value"); + if (ld) ld.textContent = fmt1(remaining_time_diarization); + return; + } + lastSignature = signature; + + const linesHtml = (lines || []) + .map((item, idx) => { + let timeInfo = ""; + if (item.beg !== undefined && item.end !== undefined) { + timeInfo = ` ${item.beg} - ${item.end}`; + } + + let speakerLabel = ""; + if (item.speaker === -2) { + speakerLabel = `Silence${timeInfo}`; + } else if (item.speaker == 0 && !isFinalizing) { + speakerLabel = `${fmt1( + remaining_time_diarization + )} second(s) of audio are undergoing diarization`; + } else if (item.speaker == -1) { + speakerLabel = `Speaker 1${timeInfo}`; + } else if (item.speaker !== -1 && item.speaker !== 0) { + speakerLabel = `Speaker ${item.speaker}${timeInfo}`; + } + + let currentLineText = item.text || ""; + + if (idx === lines.length - 1) { + if (!isFinalizing && item.speaker !== -2) { + if (remaining_time_transcription > 0) { + speakerLabel += `Transcription lag ${fmt1( + remaining_time_transcription + )}s`; + } + if (buffer_diarization && remaining_time_diarization > 0) { + speakerLabel += `Diarization lag${fmt1( + remaining_time_diarization + )}s`; + } + } + + if (buffer_diarization) { + if (isFinalizing) { + currentLineText += + (currentLineText.length > 0 && buffer_diarization.trim().length > 0 ? " " : "") + buffer_diarization.trim(); + } else { + currentLineText += `${buffer_diarization}`; + } + } + if (buffer_transcription) { + if (isFinalizing) { + currentLineText += + (currentLineText.length > 0 && buffer_transcription.trim().length > 0 ? " " : "") + + buffer_transcription.trim(); + } else { + currentLineText += `${buffer_transcription}`; + } + } + } + + return currentLineText.trim().length > 0 || speakerLabel.length > 0 + ? `

${speakerLabel}

${currentLineText}

` + : `

${speakerLabel}

`; + }) + .join(""); + + linesTranscriptDiv.innerHTML = linesHtml; + window.scrollTo({ top: document.body.scrollHeight, behavior: "smooth" }); +} + +function updateTimer() { + if (!startTime) return; + + const elapsed = Math.floor((Date.now() - startTime) / 1000); + const minutes = Math.floor(elapsed / 60).toString().padStart(2, "0"); + const seconds = (elapsed % 60).toString().padStart(2, "0"); + timerElement.textContent = `${minutes}:${seconds}`; +} + +function drawWaveform() { + if (!analyser) return; + + const bufferLength = analyser.frequencyBinCount; + const dataArray = new Uint8Array(bufferLength); + analyser.getByteTimeDomainData(dataArray); + + waveCtx.clearRect( + 0, + 0, + waveCanvas.width / (window.devicePixelRatio || 1), + waveCanvas.height / (window.devicePixelRatio || 1) + ); + waveCtx.lineWidth = 1; + waveCtx.strokeStyle = waveStroke; + waveCtx.beginPath(); + + const sliceWidth = (waveCanvas.width / (window.devicePixelRatio || 1)) / bufferLength; + let x = 0; + + for (let i = 0; i < bufferLength; i++) { + const v = dataArray[i] / 128.0; + const y = (v * (waveCanvas.height / (window.devicePixelRatio || 1))) / 2; + + if (i === 0) { + waveCtx.moveTo(x, y); + } else { + waveCtx.lineTo(x, y); + } + + x += sliceWidth; + } + + waveCtx.lineTo( + waveCanvas.width / (window.devicePixelRatio || 1), + (waveCanvas.height / (window.devicePixelRatio || 1)) / 2 + ); + waveCtx.stroke(); + + animationFrame = requestAnimationFrame(drawWaveform); +} + +async function startRecording() { + try { + try { + wakeLock = await navigator.wakeLock.request("screen"); + } catch (err) { + console.log("Error acquiring wake lock."); + } + + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + + audioContext = new (window.AudioContext || window.webkitAudioContext)(); + analyser = audioContext.createAnalyser(); + analyser.fftSize = 256; + microphone = audioContext.createMediaStreamSource(stream); + microphone.connect(analyser); + + recorder = new MediaRecorder(stream, { mimeType: "audio/webm" }); + recorder.ondataavailable = (e) => { + if (websocket && websocket.readyState === WebSocket.OPEN) { + websocket.send(e.data); + } + }; + recorder.start(chunkDuration); + + startTime = Date.now(); + timerInterval = setInterval(updateTimer, 1000); + drawWaveform(); + + isRecording = true; + updateUI(); + } catch (err) { + statusText.textContent = "Error accessing microphone. Please allow microphone access."; + console.error(err); + } +} + +async function stopRecording() { + if (wakeLock) { + try { + await wakeLock.release(); + } catch (e) { + // ignore + } + wakeLock = null; + } + + userClosing = true; + waitingForStop = true; + + if (websocket && websocket.readyState === WebSocket.OPEN) { + const emptyBlob = new Blob([], { type: "audio/webm" }); + websocket.send(emptyBlob); + statusText.textContent = "Recording stopped. Processing final audio..."; + } + + if (recorder) { + recorder.stop(); + recorder = null; + } + + if (microphone) { + microphone.disconnect(); + microphone = null; + } + + if (analyser) { + analyser = null; + } + + if (audioContext && audioContext.state !== "closed") { + try { + await audioContext.close(); + } catch (e) { + console.warn("Could not close audio context:", e); + } + audioContext = null; + } + + if (animationFrame) { + cancelAnimationFrame(animationFrame); + animationFrame = null; + } + + if (timerInterval) { + clearInterval(timerInterval); + timerInterval = null; + } + timerElement.textContent = "00:00"; + startTime = null; + + isRecording = false; + updateUI(); +} + +async function toggleRecording() { + if (!isRecording) { + if (waitingForStop) { + console.log("Waiting for stop, early return"); + return; + } + console.log("Connecting to WebSocket"); + try { + if (websocket && websocket.readyState === WebSocket.OPEN) { + await startRecording(); + } else { + await setupWebSocket(); + await startRecording(); + } + } catch (err) { + statusText.textContent = "Could not connect to WebSocket or access mic. Aborted."; + console.error(err); + } + } else { + console.log("Stopping recording"); + stopRecording(); + } +} + +function updateUI() { + recordButton.classList.toggle("recording", isRecording); + recordButton.disabled = waitingForStop; + + if (waitingForStop) { + if (statusText.textContent !== "Recording stopped. Processing final audio...") { + statusText.textContent = "Please wait for processing to complete..."; + } + } else if (isRecording) { + statusText.textContent = "Recording..."; + } else { + if ( + statusText.textContent !== "Finished processing audio! Ready to record again." && + statusText.textContent !== "Processing finalized or connection closed." + ) { + statusText.textContent = "Click to start transcription"; + } + } + if (!waitingForStop) { + recordButton.disabled = false; + } +} + +recordButton.addEventListener("click", toggleRecording); diff --git a/whisperlivekit/web/src/dark_mode.svg b/whisperlivekit/web/src/dark_mode.svg new file mode 100644 index 0000000..a083e1a --- /dev/null +++ b/whisperlivekit/web/src/dark_mode.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/whisperlivekit/web/src/light_mode.svg b/whisperlivekit/web/src/light_mode.svg new file mode 100644 index 0000000..66b6e74 --- /dev/null +++ b/whisperlivekit/web/src/light_mode.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/whisperlivekit/web/src/system_mode.svg b/whisperlivekit/web/src/system_mode.svg new file mode 100644 index 0000000..d81dbf3 --- /dev/null +++ b/whisperlivekit/web/src/system_mode.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/whisperlivekit/web/web_interface.py b/whisperlivekit/web/web_interface.py index 340d744..0b0886e 100644 --- a/whisperlivekit/web/web_interface.py +++ b/whisperlivekit/web/web_interface.py @@ -10,4 +10,24 @@ def get_web_interface_html(): return f.read() except Exception as e: logger.error(f"Error loading web interface HTML: {e}") - return "

Error loading interface

" \ No newline at end of file + return "

Error loading interface

" + + +if __name__ == '__main__': + + from fastapi import FastAPI + from fastapi.responses import HTMLResponse + import uvicorn + from starlette.staticfiles import StaticFiles + import pathlib + import whisperlivekit.web as webpkg + + app = FastAPI() + web_dir = pathlib.Path(webpkg.__file__).parent + app.mount("/web", StaticFiles(directory=str(web_dir)), name="web") + + @app.get("/") + async def get(): + return HTMLResponse(get_web_interface_html()) + + uvicorn.run(app=app) \ No newline at end of file