diff --git a/chrome-extension/README.md b/chrome-extension/README.md
new file mode 100644
index 0000000..ab353c3
--- /dev/null
+++ b/chrome-extension/README.md
@@ -0,0 +1,13 @@
+## WhisperLiveKit Chrome Extension v0.1.0
+Capture the audio of your current tab, transcribe or translate it using WhisperliveKit. **Still unstable**
+
+
+
+## Running this extension
+1. Clone this repository.
+2. Load this directory in Chrome as an unpacked extension.
+
+
+## Devs:
+- Impossible to capture audio from tabs if extension is a pannel, unfortunately: https://issues.chromium.org/issues/40926394
+- To capture microphone in an extension, there are tricks: https://github.com/justinmann/sidepanel-audio-issue , https://medium.com/@lynchee.owo/how-to-enable-microphone-access-in-chrome-extensions-by-code-924295170080 (comments)
diff --git a/chrome-extension/demo-extension.png b/chrome-extension/demo-extension.png
new file mode 100644
index 0000000..ef6e7e2
Binary files /dev/null and b/chrome-extension/demo-extension.png differ
diff --git a/chrome-extension/example_tab_capture.js b/chrome-extension/example_tab_capture.js
new file mode 100644
index 0000000..8095371
--- /dev/null
+++ b/chrome-extension/example_tab_capture.js
@@ -0,0 +1,315 @@
+const extend = function() { //helper function to merge objects
+ let target = arguments[0],
+ sources = [].slice.call(arguments, 1);
+ for (let i = 0; i < sources.length; ++i) {
+ let src = sources[i];
+ for (key in src) {
+ let val = src[key];
+ target[key] = typeof val === "object"
+ ? extend(typeof target[key] === "object" ? target[key] : {}, val)
+ : val;
+ }
+ }
+ return target;
+};
+
+const WORKER_FILE = {
+ wav: "WavWorker.js",
+ mp3: "Mp3Worker.js"
+};
+
+// default configs
+const CONFIGS = {
+ workerDir: "/workers/", // worker scripts dir (end with /)
+ numChannels: 2, // number of channels
+ encoding: "wav", // encoding (can be changed at runtime)
+
+ // runtime options
+ options: {
+ timeLimit: 1200, // recording time limit (sec)
+ encodeAfterRecord: true, // process encoding after recording
+ progressInterval: 1000, // encoding progress report interval (millisec)
+ bufferSize: undefined, // buffer size (use browser default)
+
+ // encoding-specific options
+ wav: {
+ mimeType: "audio/wav"
+ },
+ mp3: {
+ mimeType: "audio/mpeg",
+ bitRate: 192 // (CBR only): bit rate = [64 .. 320]
+ }
+ }
+};
+
+class Recorder {
+
+ constructor(source, configs) { //creates audio context from the source and connects it to the worker
+ extend(this, CONFIGS, configs || {});
+ this.context = source.context;
+ if (this.context.createScriptProcessor == null)
+ this.context.createScriptProcessor = this.context.createJavaScriptNode;
+ this.input = this.context.createGain();
+ source.connect(this.input);
+ this.buffer = [];
+ this.initWorker();
+ }
+
+ isRecording() {
+ return this.processor != null;
+ }
+
+ setEncoding(encoding) {
+ if(!this.isRecording() && this.encoding !== encoding) {
+ this.encoding = encoding;
+ this.initWorker();
+ }
+ }
+
+ setOptions(options) {
+ if (!this.isRecording()) {
+ extend(this.options, options);
+ this.worker.postMessage({ command: "options", options: this.options});
+ }
+ }
+
+ startRecording() {
+ if(!this.isRecording()) {
+ let numChannels = this.numChannels;
+ let buffer = this.buffer;
+ let worker = this.worker;
+ this.processor = this.context.createScriptProcessor(
+ this.options.bufferSize,
+ this.numChannels, this.numChannels);
+ this.input.connect(this.processor);
+ this.processor.connect(this.context.destination);
+ this.processor.onaudioprocess = function(event) {
+ for (var ch = 0; ch < numChannels; ++ch)
+ buffer[ch] = event.inputBuffer.getChannelData(ch);
+ worker.postMessage({ command: "record", buffer: buffer });
+ };
+ this.worker.postMessage({
+ command: "start",
+ bufferSize: this.processor.bufferSize
+ });
+ this.startTime = Date.now();
+ }
+ }
+
+ cancelRecording() {
+ if(this.isRecording()) {
+ this.input.disconnect();
+ this.processor.disconnect();
+ delete this.processor;
+ this.worker.postMessage({ command: "cancel" });
+ }
+ }
+
+ finishRecording() {
+ if (this.isRecording()) {
+ this.input.disconnect();
+ this.processor.disconnect();
+ delete this.processor;
+ this.worker.postMessage({ command: "finish" });
+ }
+ }
+
+ cancelEncoding() {
+ if (this.options.encodeAfterRecord)
+ if (!this.isRecording()) {
+ this.onEncodingCanceled(this);
+ this.initWorker();
+ }
+ }
+
+ initWorker() {
+ if (this.worker != null)
+ this.worker.terminate();
+ this.onEncoderLoading(this, this.encoding);
+ this.worker = new Worker(this.workerDir + WORKER_FILE[this.encoding]);
+ let _this = this;
+ this.worker.onmessage = function(event) {
+ let data = event.data;
+ switch (data.command) {
+ case "loaded":
+ _this.onEncoderLoaded(_this, _this.encoding);
+ break;
+ case "timeout":
+ _this.onTimeout(_this);
+ break;
+ case "progress":
+ _this.onEncodingProgress(_this, data.progress);
+ break;
+ case "complete":
+ _this.onComplete(_this, data.blob);
+ }
+ }
+ this.worker.postMessage({
+ command: "init",
+ config: {
+ sampleRate: this.context.sampleRate,
+ numChannels: this.numChannels
+ },
+ options: this.options
+ });
+ }
+
+ onEncoderLoading(recorder, encoding) {}
+ onEncoderLoaded(recorder, encoding) {}
+ onTimeout(recorder) {}
+ onEncodingProgress(recorder, progress) {}
+ onEncodingCanceled(recorder) {}
+ onComplete(recorder, blob) {}
+
+}
+
+const audioCapture = (timeLimit, muteTab, format, quality, limitRemoved) => {
+ chrome.tabCapture.capture({audio: true}, (stream) => { // sets up stream for capture
+ let startTabId; //tab when the capture is started
+ let timeout;
+ let completeTabID; //tab when the capture is stopped
+ let audioURL = null; //resulting object when encoding is completed
+ chrome.tabs.query({active:true, currentWindow: true}, (tabs) => startTabId = tabs[0].id) //saves start tab
+ const liveStream = stream;
+ const audioCtx = new AudioContext();
+ const source = audioCtx.createMediaStreamSource(stream);
+ let mediaRecorder = new Recorder(source); //initiates the recorder based on the current stream
+ mediaRecorder.setEncoding(format); //sets encoding based on options
+ if(limitRemoved) { //removes time limit
+ mediaRecorder.setOptions({timeLimit: 10800});
+ } else {
+ mediaRecorder.setOptions({timeLimit: timeLimit/1000});
+ }
+ if(format === "mp3") {
+ mediaRecorder.setOptions({mp3: {bitRate: quality}});
+ }
+ mediaRecorder.startRecording();
+
+ function onStopCommand(command) { //keypress
+ if (command === "stop") {
+ stopCapture();
+ }
+ }
+ function onStopClick(request) { //click on popup
+ if(request === "stopCapture") {
+ stopCapture();
+ } else if (request === "cancelCapture") {
+ cancelCapture();
+ } else if (request.cancelEncodeID) {
+ if(request.cancelEncodeID === startTabId && mediaRecorder) {
+ mediaRecorder.cancelEncoding();
+ }
+ }
+ }
+ chrome.commands.onCommand.addListener(onStopCommand);
+ chrome.runtime.onMessage.addListener(onStopClick);
+ mediaRecorder.onComplete = (recorder, blob) => {
+ audioURL = window.URL.createObjectURL(blob);
+ if(completeTabID) {
+ chrome.tabs.sendMessage(completeTabID, {type: "encodingComplete", audioURL});
+ }
+ mediaRecorder = null;
+ }
+ mediaRecorder.onEncodingProgress = (recorder, progress) => {
+ if(completeTabID) {
+ chrome.tabs.sendMessage(completeTabID, {type: "encodingProgress", progress: progress});
+ }
+ }
+
+ const stopCapture = function() {
+ let endTabId;
+ //check to make sure the current tab is the tab being captured
+ chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
+ endTabId = tabs[0].id;
+ if(mediaRecorder && startTabId === endTabId){
+ mediaRecorder.finishRecording();
+ chrome.tabs.create({url: "complete.html"}, (tab) => {
+ completeTabID = tab.id;
+ let completeCallback = () => {
+ chrome.tabs.sendMessage(tab.id, {type: "createTab", format: format, audioURL, startID: startTabId});
+ }
+ setTimeout(completeCallback, 500);
+ });
+ closeStream(endTabId);
+ }
+ })
+ }
+
+ const cancelCapture = function() {
+ let endTabId;
+ chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
+ endTabId = tabs[0].id;
+ if(mediaRecorder && startTabId === endTabId){
+ mediaRecorder.cancelRecording();
+ closeStream(endTabId);
+ }
+ })
+ }
+
+//removes the audio context and closes recorder to save memory
+ const closeStream = function(endTabId) {
+ chrome.commands.onCommand.removeListener(onStopCommand);
+ chrome.runtime.onMessage.removeListener(onStopClick);
+ mediaRecorder.onTimeout = () => {};
+ audioCtx.close();
+ liveStream.getAudioTracks()[0].stop();
+ sessionStorage.removeItem(endTabId);
+ chrome.runtime.sendMessage({captureStopped: endTabId});
+ }
+
+ mediaRecorder.onTimeout = stopCapture;
+
+ if(!muteTab) {
+ let audio = new Audio();
+ audio.srcObject = liveStream;
+ audio.play();
+ }
+ });
+}
+
+
+
+//sends reponses to and from the popup menu
+chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
+ if (request.currentTab && sessionStorage.getItem(request.currentTab)) {
+ sendResponse(sessionStorage.getItem(request.currentTab));
+ } else if (request.currentTab){
+ sendResponse(false);
+ } else if (request === "startCapture") {
+ startCapture();
+ }
+});
+
+const startCapture = function() {
+ chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
+ // CODE TO BLOCK CAPTURE ON YOUTUBE, DO NOT REMOVE
+ // if(tabs[0].url.toLowerCase().includes("youtube")) {
+ // chrome.tabs.create({url: "error.html"});
+ // } else {
+ if(!sessionStorage.getItem(tabs[0].id)) {
+ sessionStorage.setItem(tabs[0].id, Date.now());
+ chrome.storage.sync.get({
+ maxTime: 1200000,
+ muteTab: false,
+ format: "mp3",
+ quality: 192,
+ limitRemoved: false
+ }, (options) => {
+ let time = options.maxTime;
+ if(time > 1200000) {
+ time = 1200000
+ }
+ audioCapture(time, options.muteTab, options.format, options.quality, options.limitRemoved);
+ });
+ chrome.runtime.sendMessage({captureStarted: tabs[0].id, startTime: Date.now()});
+ }
+ // }
+ });
+};
+
+
+chrome.commands.onCommand.addListener((command) => {
+ if (command === "start") {
+ startCapture();
+ }
+});
\ No newline at end of file
diff --git a/chrome-extension/manifest.json b/chrome-extension/manifest.json
new file mode 100644
index 0000000..a925ee5
--- /dev/null
+++ b/chrome-extension/manifest.json
@@ -0,0 +1,17 @@
+{
+ "manifest_version": 3,
+ "name": "WhisperLiveKit Tab Capture",
+ "version": "1.0",
+ "description": "Capture and transcribe audio from browser tabs using WhisperLiveKit.",
+ "action": {
+ "default_title": "WhisperLiveKit Tab Capture",
+ "default_popup": "popup.html"
+ },
+ "permissions": ["scripting", "tabCapture", "offscreen", "activeTab", "storage"],
+ "web_accessible_resources": [
+ {
+ "resources": ["requestPermissions.html", "requestPermissions.js"],
+ "matches": [""]
+ }
+ ]
+}
diff --git a/chrome-extension/popup.html b/chrome-extension/popup.html
new file mode 100644
index 0000000..1677c5d
--- /dev/null
+++ b/chrome-extension/popup.html
@@ -0,0 +1,73 @@
+
+
+
+
+
+
+ WhisperLiveKit
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/chrome-extension/requestPermissions.html b/chrome-extension/requestPermissions.html
new file mode 100644
index 0000000..86d36dc
--- /dev/null
+++ b/chrome-extension/requestPermissions.html
@@ -0,0 +1,12 @@
+
+
+
+ Request Permissions
+
+
+
+ This page exists to workaround an issue with Chrome that blocks permission
+ requests from chrome extensions
+
+
+
diff --git a/chrome-extension/requestPermissions.js b/chrome-extension/requestPermissions.js
new file mode 100644
index 0000000..0f1b750
--- /dev/null
+++ b/chrome-extension/requestPermissions.js
@@ -0,0 +1,17 @@
+/**
+ * Requests user permission for microphone access.
+ * @returns {Promise} A Promise that resolves when permission is granted or rejects with an error.
+ */
+async function getUserPermission() {
+ console.log("Getting user permission for microphone access...");
+ await navigator.mediaDevices.getUserMedia({ audio: true });
+ const micPermission = await navigator.permissions.query({
+ name: "microphone",
+ });
+ if (micPermission.state == "granted") {
+ window.close();
+ }
+}
+
+// Call the function to request microphone permission
+getUserPermission();
diff --git a/chrome-extension/service-worker.js b/chrome-extension/service-worker.js
new file mode 100644
index 0000000..8d69af7
--- /dev/null
+++ b/chrome-extension/service-worker.js
@@ -0,0 +1,249 @@
+console.log("Service worker loaded");
+
+let isRecording = false;
+let currentStreamId = null;
+
+chrome.runtime.onInstalled.addListener((details) => {
+ console.log("Extension installed/updated");
+});
+
+chrome.action.onClicked.addListener((tab) => {
+ // Get the current tab ID
+ const tabId = tab.id;
+
+ // Inject the content script into the current tab
+ chrome.scripting.executeScript({
+ target: { tabId: tabId },
+ files: ['style_popup.js']
+ });
+
+ console.log(`Content script injected into tab ${tabId}`);
+});
+
+
+// Handle messages from popup
+chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => {
+ console.log("Service worker received message:", message);
+
+ try {
+ switch (message.type) {
+ case 'start-capture':
+ const startResult = await startTabCapture(message.tabId, message.websocketUrl);
+ sendResponse(startResult);
+ break;
+
+ case 'stop-capture':
+ const stopResult = await stopTabCapture();
+ sendResponse(stopResult);
+ break;
+
+ case 'get-recording-state':
+ sendResponse({ isRecording: isRecording });
+ break;
+
+ default:
+ sendResponse({ success: false, error: 'Unknown message type' });
+ }
+ } catch (error) {
+ console.error('Error handling message:', error);
+ sendResponse({ success: false, error: error.message });
+ }
+
+ return true; // Keep message channel open for async response
+});
+
+async function startTabCapture(tabId, websocketUrl) {
+ console.log('Service worker: Starting tab capture process...');
+ console.log('Service worker: tabId:', tabId, 'websocketUrl:', websocketUrl);
+
+ try {
+ if (isRecording) {
+ console.log('Service worker: Already recording, aborting');
+ return { success: false, error: 'Already recording' };
+ }
+
+ // Check if offscreen document exists
+ console.log('Service worker: Checking for existing offscreen document...');
+ const existingContexts = await chrome.runtime.getContexts({});
+ console.log('Service worker: Found contexts:', existingContexts.length);
+
+ const offscreenDocument = existingContexts.find(
+ (c) => c.contextType === 'OFFSCREEN_DOCUMENT'
+ );
+
+ console.log('Service worker: Offscreen document exists:', !!offscreenDocument);
+
+ // Create offscreen document if it doesn't exist
+ if (!offscreenDocument) {
+ console.log('Service worker: Creating offscreen document...');
+ try {
+ await chrome.offscreen.createDocument({
+ url: 'offscreen.html',
+ reasons: ['USER_MEDIA'],
+ justification: 'Capturing and processing tab audio for transcription'
+ });
+ console.log('Service worker: Offscreen document created successfully');
+
+ // Wait for offscreen document to initialize
+ console.log('Service worker: Waiting for offscreen document to initialize...');
+ await new Promise(resolve => setTimeout(resolve, 500));
+ console.log('Service worker: Offscreen document initialization delay complete');
+
+ } catch (offscreenError) {
+ console.error('Service worker: Failed to create offscreen document:', offscreenError);
+ return { success: false, error: 'Failed to create offscreen document: ' + offscreenError.message };
+ }
+ }
+
+ // Get media stream ID for the tab
+ console.log('Service worker: Getting media stream ID for tab:', tabId);
+ try {
+ currentStreamId = await chrome.tabCapture.getMediaStreamId({
+ targetTabId: tabId
+ });
+ console.log('Service worker: Media stream ID:', currentStreamId);
+ } catch (tabCaptureError) {
+ console.error('Service worker: Failed to get media stream ID:', tabCaptureError);
+ return { success: false, error: 'Failed to get media stream ID: ' + tabCaptureError.message };
+ }
+
+ if (!currentStreamId) {
+ console.log('Service worker: No media stream ID returned');
+ return { success: false, error: 'Failed to get media stream ID - no stream returned' };
+ }
+
+ // Send message to offscreen document to start capture with retry logic
+ console.log('Service worker: Sending start message to offscreen document...');
+
+ let response;
+ let retryCount = 0;
+ const maxRetries = 5;
+
+ while (!response && retryCount < maxRetries) {
+ try {
+ console.log(`Service worker: Attempt ${retryCount + 1}/${maxRetries} to communicate with offscreen document`);
+
+ // Send message to offscreen document without target property
+ response = await chrome.runtime.sendMessage({
+ type: 'start-recording',
+ target: 'offscreen',
+ data: {
+ streamId: currentStreamId,
+ websocketUrl: websocketUrl
+ }
+ });
+
+ if (!response) {
+ console.warn(`Service worker: No response from offscreen document, waiting before retry...`);
+ await new Promise(resolve => setTimeout(resolve, 200));
+ retryCount++;
+ } else {
+ console.log(`Service worker: Successfully communicated with offscreen document on attempt ${retryCount + 1}`);
+ }
+ } catch (sendError) {
+ console.error(`Service worker: Error sending message to offscreen document (attempt ${retryCount + 1}):`, sendError);
+ response = { success: false, error: 'Failed to communicate with offscreen document: ' + sendError.message };
+ break;
+ }
+ }
+
+ console.log('Service worker: Final offscreen document response:', response);
+
+ if (response && response.success) {
+ isRecording = true;
+ console.log('Service worker: Recording started successfully');
+
+ // Notify popup of state change
+ try {
+ chrome.runtime.sendMessage({
+ type: 'recording-state',
+ isRecording: true
+ });
+ } catch (e) {
+ console.warn('Service worker: Could not notify popup of state change:', e);
+ }
+
+ return { success: true };
+ } else {
+ console.log('Service worker: Offscreen document returned failure');
+ return { success: false, error: response?.error || 'Failed to start recording in offscreen document' };
+ }
+
+ } catch (error) {
+ console.error('Service worker: Exception in startTabCapture:', error);
+ return { success: false, error: 'Exception: ' + error.message };
+ }
+}
+
+async function stopTabCapture() {
+ try {
+ if (!isRecording) {
+ return { success: false, error: 'Not currently recording' };
+ }
+
+ // Send message to offscreen document to stop capture
+ const response = await chrome.runtime.sendMessage({
+ type: 'stop-recording',
+ target: 'offscreen'
+ });
+
+ isRecording = false;
+ currentStreamId = null;
+
+ // Notify popup of state change
+ try {
+ chrome.runtime.sendMessage({
+ type: 'recording-state',
+ isRecording: false
+ });
+ } catch (e) {
+ // Popup might be closed, ignore error
+ }
+
+ return { success: true };
+
+ } catch (error) {
+ console.error('Error stopping tab capture:', error);
+ isRecording = false;
+ currentStreamId = null;
+ return { success: false, error: error.message };
+ }
+}
+
+// Handle messages from offscreen document
+chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
+ if (message.target === 'service-worker') {
+ switch (message.type) {
+ case 'recording-stopped':
+ isRecording = false;
+ currentStreamId = null;
+
+ // Notify popup
+ try {
+ chrome.runtime.sendMessage({
+ type: 'recording-state',
+ isRecording: false
+ });
+ } catch (e) {
+ // Popup might be closed, ignore error
+ }
+ break;
+
+ case 'recording-error':
+ isRecording = false;
+ currentStreamId = null;
+
+ // Notify popup
+ try {
+ chrome.runtime.sendMessage({
+ type: 'status-update',
+ status: 'error',
+ message: message.error || 'Recording error occurred'
+ });
+ } catch (e) {
+ // Popup might be closed, ignore error
+ }
+ break;
+ }
+ }
+});
diff --git a/chrome-extension/sidepanel.js b/chrome-extension/sidepanel.js
new file mode 100644
index 0000000..5bf3311
--- /dev/null
+++ b/chrome-extension/sidepanel.js
@@ -0,0 +1,29 @@
+console.log("sidepanel.js");
+
+async function run() {
+ const micPermission = await navigator.permissions.query({
+ name: "microphone",
+ });
+
+ document.getElementById(
+ "audioPermission"
+ ).innerText = `MICROPHONE: ${micPermission.state}`;
+
+ if (micPermission.state !== "granted") {
+ chrome.tabs.create({ url: "requestPermissions.html" });
+ }
+
+ const intervalId = setInterval(async () => {
+ const micPermission = await navigator.permissions.query({
+ name: "microphone",
+ });
+ if (micPermission.state === "granted") {
+ document.getElementById(
+ "audioPermission"
+ ).innerText = `MICROPHONE: ${micPermission.state}`;
+ clearInterval(intervalId);
+ }
+ }, 100);
+}
+
+void run();
diff --git a/chrome-extension/web/live_transcription.css b/chrome-extension/web/live_transcription.css
new file mode 100644
index 0000000..01392d6
--- /dev/null
+++ b/chrome-extension/web/live_transcription.css
@@ -0,0 +1,469 @@
+:root {
+ --bg: #ffffff;
+ --text: #111111;
+ --muted: #666666;
+ --border: #e5e5e5;
+ --chip-bg: rgba(0, 0, 0, 0.04);
+ --chip-text: #000000;
+ --spinner-border: #8d8d8d5c;
+ --spinner-top: #b0b0b0;
+ --silence-bg: #f3f3f3;
+ --loading-bg: rgba(255, 77, 77, 0.06);
+ --button-bg: #ffffff;
+ --button-border: #e9e9e9;
+ --wave-stroke: #000000;
+ --label-dia-text: #868686;
+ --label-trans-text: #111111;
+}
+
+@media (prefers-color-scheme: dark) {
+ :root:not([data-theme="light"]) {
+ --bg: #0b0b0b;
+ --text: #e6e6e6;
+ --muted: #9aa0a6;
+ --border: #333333;
+ --chip-bg: rgba(255, 255, 255, 0.08);
+ --chip-text: #e6e6e6;
+ --spinner-border: #555555;
+ --spinner-top: #dddddd;
+ --silence-bg: #1a1a1a;
+ --loading-bg: rgba(255, 77, 77, 0.12);
+ --button-bg: #111111;
+ --button-border: #333333;
+ --wave-stroke: #e6e6e6;
+ --label-dia-text: #b3b3b3;
+ --label-trans-text: #ffffff;
+ }
+}
+
+:root[data-theme="dark"] {
+ --bg: #0b0b0b;
+ --text: #e6e6e6;
+ --muted: #9aa0a6;
+ --border: #333333;
+ --chip-bg: rgba(255, 255, 255, 0.08);
+ --chip-text: #e6e6e6;
+ --spinner-border: #555555;
+ --spinner-top: #dddddd;
+ --silence-bg: #1a1a1a;
+ --loading-bg: rgba(255, 77, 77, 0.12);
+ --button-bg: #111111;
+ --button-border: #333333;
+ --wave-stroke: #e6e6e6;
+ --label-dia-text: #b3b3b3;
+ --label-trans-text: #ffffff;
+}
+
+:root[data-theme="light"] {
+ --bg: #ffffff;
+ --text: #111111;
+ --muted: #666666;
+ --border: #e5e5e5;
+ --chip-bg: rgba(0, 0, 0, 0.04);
+ --chip-text: #000000;
+ --spinner-border: #8d8d8d5c;
+ --spinner-top: #b0b0b0;
+ --silence-bg: #f3f3f3;
+ --loading-bg: rgba(255, 77, 77, 0.06);
+ --button-bg: #ffffff;
+ --button-border: #e9e9e9;
+ --wave-stroke: #000000;
+ --label-dia-text: #868686;
+ --label-trans-text: #111111;
+}
+
+body {
+ font-family: ui-sans-serif, system-ui, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji';
+ margin: 20px;
+ text-align: center;
+ background-color: var(--bg);
+ color: var(--text);
+}
+
+/* Record button */
+#recordButton {
+ width: 50px;
+ height: 50px;
+ border: none;
+ border-radius: 50%;
+ background-color: var(--button-bg);
+ cursor: pointer;
+ transition: all 0.3s ease;
+ border: 1px solid var(--button-border);
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ position: relative;
+}
+
+#recordButton.recording {
+ width: 180px;
+ border-radius: 40px;
+ justify-content: flex-start;
+ padding-left: 20px;
+}
+
+#recordButton:active {
+ transform: scale(0.95);
+}
+
+.shape-container {
+ width: 25px;
+ height: 25px;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ flex-shrink: 0;
+}
+
+.shape {
+ width: 25px;
+ height: 25px;
+ background-color: rgb(209, 61, 53);
+ border-radius: 50%;
+ transition: all 0.3s ease;
+}
+
+#recordButton:disabled .shape {
+ background-color: #6e6d6d;
+}
+
+#recordButton.recording .shape {
+ border-radius: 5px;
+ width: 25px;
+ height: 25px;
+}
+
+/* Recording elements */
+.recording-info {
+ display: none;
+ align-items: center;
+ margin-left: 15px;
+ flex-grow: 1;
+}
+
+#recordButton.recording .recording-info {
+ display: flex;
+}
+
+.wave-container {
+ width: 60px;
+ height: 30px;
+ position: relative;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+}
+
+#waveCanvas {
+ width: 100%;
+ height: 100%;
+}
+
+.timer {
+ font-size: 14px;
+ font-weight: 500;
+ color: var(--text);
+ margin-left: 10px;
+}
+
+#status {
+ margin-top: 20px;
+ font-size: 16px;
+ color: var(--text);
+}
+
+/* Settings */
+.settings-container {
+ display: flex;
+ justify-content: center;
+ align-items: center;
+ gap: 15px;
+ margin-top: 20px;
+}
+
+.settings {
+ display: flex;
+ flex-wrap: wrap;
+ align-items: flex-start;
+ gap: 12px;
+}
+
+.field {
+ display: flex;
+ flex-direction: column;
+ align-items: flex-start;
+ gap: 3px;
+}
+
+#chunkSelector,
+#websocketInput,
+#themeSelector,
+#microphoneSelect {
+ font-size: 16px;
+ padding: 5px 8px;
+ border-radius: 8px;
+ border: 1px solid var(--border);
+ background-color: var(--button-bg);
+ color: var(--text);
+ max-height: 30px;
+}
+
+#microphoneSelect {
+ width: 100%;
+ max-width: 190px;
+ min-width: 120px;
+}
+
+#chunkSelector:focus,
+#websocketInput:focus,
+#themeSelector:focus,
+#microphoneSelect:focus {
+ outline: none;
+ border-color: #007bff;
+ box-shadow: 0 0 0 3px rgba(0, 123, 255, 0.15);
+}
+
+label {
+ font-size: 13px;
+ color: var(--muted);
+}
+
+.ws-default {
+ font-size: 12px;
+ color: var(--muted);
+}
+
+/* Segmented pill control for Theme */
+.segmented {
+ display: inline-flex;
+ align-items: stretch;
+ border: 1px solid var(--button-border);
+ background-color: var(--button-bg);
+ border-radius: 999px;
+ overflow: hidden;
+}
+
+.segmented input[type="radio"] {
+ position: absolute;
+ opacity: 0;
+ pointer-events: none;
+}
+
+.theme-selector-container {
+ display: flex;
+ align-items: center;
+ margin-top: 17px;
+}
+
+.segmented label {
+ display: inline-flex;
+ align-items: center;
+ gap: 6px;
+ padding: 6px 12px;
+ font-size: 14px;
+ color: var(--muted);
+ cursor: pointer;
+ user-select: none;
+ transition: background-color 0.2s ease, color 0.2s ease;
+}
+
+.segmented label span {
+ display: none;
+}
+
+.segmented label:hover span {
+ display: inline;
+}
+
+.segmented label:hover {
+ background-color: var(--chip-bg);
+}
+
+.segmented img {
+ width: 16px;
+ height: 16px;
+}
+
+.segmented input[type="radio"]:checked + label {
+ background-color: var(--chip-bg);
+ color: var(--text);
+}
+
+.segmented input[type="radio"]:focus-visible + label,
+.segmented input[type="radio"]:focus + label {
+ outline: 2px solid #007bff;
+ outline-offset: 2px;
+ border-radius: 999px;
+}
+
+/* Transcript area */
+#linesTranscript {
+ margin: 20px auto;
+ max-width: 700px;
+ text-align: left;
+ font-size: 16px;
+}
+
+#linesTranscript p {
+ margin: 0px 0;
+}
+
+#linesTranscript strong {
+ color: var(--text);
+}
+
+#speaker {
+ border: 1px solid var(--border);
+ border-radius: 100px;
+ padding: 2px 10px;
+ font-size: 14px;
+ margin-bottom: 0px;
+}
+
+.label_diarization {
+ background-color: var(--chip-bg);
+ border-radius: 8px 8px 8px 8px;
+ padding: 2px 10px;
+ margin-left: 10px;
+ display: inline-block;
+ white-space: nowrap;
+ font-size: 14px;
+ margin-bottom: 0px;
+ color: var(--label-dia-text);
+}
+
+.label_transcription {
+ background-color: var(--chip-bg);
+ border-radius: 8px 8px 8px 8px;
+ padding: 2px 10px;
+ display: inline-block;
+ white-space: nowrap;
+ margin-left: 10px;
+ font-size: 14px;
+ margin-bottom: 0px;
+ color: var(--label-trans-text);
+}
+
+#timeInfo {
+ color: var(--muted);
+ margin-left: 10px;
+}
+
+.textcontent {
+ font-size: 16px;
+ padding-left: 10px;
+ margin-bottom: 10px;
+ margin-top: 1px;
+ padding-top: 5px;
+ border-radius: 0px 0px 0px 10px;
+}
+
+.buffer_diarization {
+ color: var(--label-dia-text);
+ margin-left: 4px;
+}
+
+.buffer_transcription {
+ color: #7474748c;
+ margin-left: 4px;
+}
+
+.spinner {
+ display: inline-block;
+ width: 8px;
+ height: 8px;
+ border: 2px solid var(--spinner-border);
+ border-top: 2px solid var(--spinner-top);
+ border-radius: 50%;
+ animation: spin 0.7s linear infinite;
+ vertical-align: middle;
+ margin-bottom: 2px;
+ margin-right: 5px;
+}
+
+@keyframes spin {
+ to {
+ transform: rotate(360deg);
+ }
+}
+
+.silence {
+ color: var(--muted);
+ background-color: var(--silence-bg);
+ font-size: 13px;
+ border-radius: 30px;
+ padding: 2px 10px;
+}
+
+.loading {
+ color: var(--muted);
+ background-color: var(--loading-bg);
+ border-radius: 8px 8px 8px 0px;
+ padding: 2px 10px;
+ font-size: 14px;
+ margin-bottom: 0px;
+}
+
+/* for smaller screens */
+@media (max-width: 768px) {
+ .settings-container {
+ flex-direction: column;
+ gap: 10px;
+ }
+
+ .settings {
+ justify-content: center;
+ gap: 8px;
+ }
+
+ .field {
+ align-items: center;
+ }
+
+ #websocketInput,
+ #microphoneSelect {
+ min-width: 200px;
+ max-width: 400px;
+ }
+
+ .theme-selector-container {
+ margin-top: 10px;
+ }
+}
+
+@media (max-width: 480px) {
+ body {
+ margin: 10px;
+ }
+
+ .settings {
+ flex-direction: column;
+ align-items: center;
+ gap: 6px;
+ }
+
+ #websocketInput,
+ #microphoneSelect {
+ max-width: 400px;
+ }
+
+ .segmented label {
+ padding: 4px 8px;
+ font-size: 12px;
+ }
+
+ .segmented img {
+ width: 14px;
+ height: 14px;
+ }
+}
+
+
+html
+{
+ width: 400px; /* max: 800px */
+ height: 600px; /* max: 600px */
+ border-radius: 10px;
+
+}
\ No newline at end of file
diff --git a/chrome-extension/web/live_transcription.js b/chrome-extension/web/live_transcription.js
new file mode 100644
index 0000000..5d1703c
--- /dev/null
+++ b/chrome-extension/web/live_transcription.js
@@ -0,0 +1,619 @@
+/* Theme, WebSocket, recording, rendering logic extracted from inline script and adapted for segmented theme control and WS caption */
+let isRecording = false;
+let websocket = null;
+let recorder = null;
+let chunkDuration = 100;
+let websocketUrl = "ws://localhost:8000/asr";
+let userClosing = false;
+let wakeLock = null;
+let startTime = null;
+let timerInterval = null;
+let audioContext = null;
+let analyser = null;
+let microphone = null;
+let waveCanvas = document.getElementById("waveCanvas");
+let waveCtx = waveCanvas.getContext("2d");
+let animationFrame = null;
+let waitingForStop = false;
+let lastReceivedData = null;
+let lastSignature = null;
+let availableMicrophones = [];
+let selectedMicrophoneId = null;
+
+waveCanvas.width = 60 * (window.devicePixelRatio || 1);
+waveCanvas.height = 30 * (window.devicePixelRatio || 1);
+waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1);
+
+const statusText = document.getElementById("status");
+const recordButton = document.getElementById("recordButton");
+const chunkSelector = document.getElementById("chunkSelector");
+const websocketInput = document.getElementById("websocketInput");
+const websocketDefaultSpan = document.getElementById("wsDefaultUrl");
+const linesTranscriptDiv = document.getElementById("linesTranscript");
+const timerElement = document.querySelector(".timer");
+const themeRadios = document.querySelectorAll('input[name="theme"]');
+const microphoneSelect = document.getElementById("microphoneSelect");
+
+function getWaveStroke() {
+ const styles = getComputedStyle(document.documentElement);
+ const v = styles.getPropertyValue("--wave-stroke").trim();
+ return v || "#000";
+}
+
+let waveStroke = getWaveStroke();
+function updateWaveStroke() {
+ waveStroke = getWaveStroke();
+}
+
+function applyTheme(pref) {
+ if (pref === "light") {
+ document.documentElement.setAttribute("data-theme", "light");
+ } else if (pref === "dark") {
+ document.documentElement.setAttribute("data-theme", "dark");
+ } else {
+ document.documentElement.removeAttribute("data-theme");
+ }
+ updateWaveStroke();
+}
+
+// Persisted theme preference
+const savedThemePref = localStorage.getItem("themePreference") || "system";
+applyTheme(savedThemePref);
+if (themeRadios.length) {
+ themeRadios.forEach((r) => {
+ r.checked = r.value === savedThemePref;
+ r.addEventListener("change", () => {
+ if (r.checked) {
+ localStorage.setItem("themePreference", r.value);
+ applyTheme(r.value);
+ }
+ });
+ });
+}
+
+// React to OS theme changes when in "system" mode
+const darkMq = window.matchMedia && window.matchMedia("(prefers-color-scheme: dark)");
+const handleOsThemeChange = () => {
+ const pref = localStorage.getItem("themePreference") || "system";
+ if (pref === "system") updateWaveStroke();
+};
+if (darkMq && darkMq.addEventListener) {
+ darkMq.addEventListener("change", handleOsThemeChange);
+} else if (darkMq && darkMq.addListener) {
+ // deprecated, but included for Safari compatibility
+ darkMq.addListener(handleOsThemeChange);
+}
+
+async function enumerateMicrophones() {
+ try {
+ const micPermission = await navigator.permissions.query({
+ name: "microphone",
+ });
+
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+ stream.getTracks().forEach(track => track.stop());
+
+ const devices = await navigator.mediaDevices.enumerateDevices();
+ availableMicrophones = devices.filter(device => device.kind === 'audioinput');
+
+ populateMicrophoneSelect();
+ console.log(`Found ${availableMicrophones.length} microphone(s)`);
+ } catch (error) {
+ console.error('Error enumerating microphones:', error);
+ statusText.textContent = "Error accessing microphones. Please grant permission.";
+ }
+}
+
+function populateMicrophoneSelect() {
+ if (!microphoneSelect) return;
+
+ microphoneSelect.innerHTML = '';
+
+ availableMicrophones.forEach((device, index) => {
+ const option = document.createElement('option');
+ option.value = device.deviceId;
+ option.textContent = device.label || `Microphone ${index + 1}`;
+ microphoneSelect.appendChild(option);
+ });
+
+ const savedMicId = localStorage.getItem('selectedMicrophone');
+ if (savedMicId && availableMicrophones.some(mic => mic.deviceId === savedMicId)) {
+ microphoneSelect.value = savedMicId;
+ selectedMicrophoneId = savedMicId;
+ }
+}
+
+function handleMicrophoneChange() {
+ selectedMicrophoneId = microphoneSelect.value || null;
+ localStorage.setItem('selectedMicrophone', selectedMicrophoneId || '');
+
+ const selectedDevice = availableMicrophones.find(mic => mic.deviceId === selectedMicrophoneId);
+ const deviceName = selectedDevice ? selectedDevice.label : 'Default Microphone';
+
+ console.log(`Selected microphone: ${deviceName}`);
+ statusText.textContent = `Microphone changed to: ${deviceName}`;
+
+ if (isRecording) {
+ statusText.textContent = "Switching microphone... Please wait.";
+ stopRecording().then(() => {
+ setTimeout(() => {
+ toggleRecording();
+ }, 1000);
+ });
+ }
+}
+
+// Helpers
+function fmt1(x) {
+ const n = Number(x);
+ return Number.isFinite(n) ? n.toFixed(1) : x;
+}
+
+// Default WebSocket URL computation
+const host = window.location.hostname || "localhost";
+const port = window.location.port;
+const protocol = window.location.protocol === "https:" ? "wss" : "ws";
+const defaultWebSocketUrl = websocketUrl;
+
+// Populate default caption and input
+if (websocketDefaultSpan) websocketDefaultSpan.textContent = defaultWebSocketUrl;
+websocketInput.value = defaultWebSocketUrl;
+websocketUrl = defaultWebSocketUrl;
+
+// Optional chunk selector (guard for presence)
+if (chunkSelector) {
+ chunkSelector.addEventListener("change", () => {
+ chunkDuration = parseInt(chunkSelector.value);
+ });
+}
+
+// WebSocket input change handling
+websocketInput.addEventListener("change", () => {
+ const urlValue = websocketInput.value.trim();
+ if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) {
+ statusText.textContent = "Invalid WebSocket URL (must start with ws:// or wss://)";
+ return;
+ }
+ websocketUrl = urlValue;
+ statusText.textContent = "WebSocket URL updated. Ready to connect.";
+});
+
+function setupWebSocket() {
+ return new Promise((resolve, reject) => {
+ try {
+ websocket = new WebSocket(websocketUrl);
+ } catch (error) {
+ statusText.textContent = "Invalid WebSocket URL. Please check and try again.";
+ reject(error);
+ return;
+ }
+
+ websocket.onopen = () => {
+ statusText.textContent = "Connected to server.";
+ resolve();
+ };
+
+ websocket.onclose = () => {
+ if (userClosing) {
+ if (waitingForStop) {
+ statusText.textContent = "Processing finalized or connection closed.";
+ if (lastReceivedData) {
+ renderLinesWithBuffer(
+ lastReceivedData.lines || [],
+ lastReceivedData.buffer_diarization || "",
+ lastReceivedData.buffer_transcription || "",
+ 0,
+ 0,
+ true
+ );
+ }
+ }
+ } else {
+ statusText.textContent = "Disconnected from the WebSocket server. (Check logs if model is loading.)";
+ if (isRecording) {
+ stopRecording();
+ }
+ }
+ isRecording = false;
+ waitingForStop = false;
+ userClosing = false;
+ lastReceivedData = null;
+ websocket = null;
+ updateUI();
+ };
+
+ websocket.onerror = () => {
+ statusText.textContent = "Error connecting to WebSocket.";
+ reject(new Error("Error connecting to WebSocket"));
+ };
+
+ websocket.onmessage = (event) => {
+ const data = JSON.parse(event.data);
+
+ if (data.type === "ready_to_stop") {
+ console.log("Ready to stop received, finalizing display and closing WebSocket.");
+ waitingForStop = false;
+
+ if (lastReceivedData) {
+ renderLinesWithBuffer(
+ lastReceivedData.lines || [],
+ lastReceivedData.buffer_diarization || "",
+ lastReceivedData.buffer_transcription || "",
+ 0,
+ 0,
+ true
+ );
+ }
+ statusText.textContent = "Finished processing audio! Ready to record again.";
+ recordButton.disabled = false;
+
+ if (websocket) {
+ websocket.close();
+ }
+ return;
+ }
+
+ lastReceivedData = data;
+
+ const {
+ lines = [],
+ buffer_transcription = "",
+ buffer_diarization = "",
+ remaining_time_transcription = 0,
+ remaining_time_diarization = 0,
+ status = "active_transcription",
+ } = data;
+
+ renderLinesWithBuffer(
+ lines,
+ buffer_diarization,
+ buffer_transcription,
+ remaining_time_diarization,
+ remaining_time_transcription,
+ false,
+ status
+ );
+ };
+ });
+}
+
+function renderLinesWithBuffer(
+ lines,
+ buffer_diarization,
+ buffer_transcription,
+ remaining_time_diarization,
+ remaining_time_transcription,
+ isFinalizing = false,
+ current_status = "active_transcription"
+) {
+ if (current_status === "no_audio_detected") {
+ linesTranscriptDiv.innerHTML =
+ "No audio detected...
";
+ return;
+ }
+
+ const showLoading = !isFinalizing && (lines || []).some((it) => it.speaker == 0);
+ const showTransLag = !isFinalizing && remaining_time_transcription > 0;
+ const showDiaLag = !isFinalizing && !!buffer_diarization && remaining_time_diarization > 0;
+ const signature = JSON.stringify({
+ lines: (lines || []).map((it) => ({ speaker: it.speaker, text: it.text, beg: it.beg, end: it.end })),
+ buffer_transcription: buffer_transcription || "",
+ buffer_diarization: buffer_diarization || "",
+ status: current_status,
+ showLoading,
+ showTransLag,
+ showDiaLag,
+ isFinalizing: !!isFinalizing,
+ });
+ if (lastSignature === signature) {
+ const t = document.querySelector(".lag-transcription-value");
+ if (t) t.textContent = fmt1(remaining_time_transcription);
+ const d = document.querySelector(".lag-diarization-value");
+ if (d) d.textContent = fmt1(remaining_time_diarization);
+ const ld = document.querySelector(".loading-diarization-value");
+ if (ld) ld.textContent = fmt1(remaining_time_diarization);
+ return;
+ }
+ lastSignature = signature;
+
+ const linesHtml = (lines || [])
+ .map((item, idx) => {
+ let timeInfo = "";
+ if (item.beg !== undefined && item.end !== undefined) {
+ timeInfo = ` ${item.beg} - ${item.end}`;
+ }
+
+ let speakerLabel = "";
+ if (item.speaker === -2) {
+ speakerLabel = `Silence${timeInfo}`;
+ } else if (item.speaker == 0 && !isFinalizing) {
+ speakerLabel = `${fmt1(
+ remaining_time_diarization
+ )} second(s) of audio are undergoing diarization`;
+ } else if (item.speaker !== 0) {
+ speakerLabel = `Speaker ${item.speaker}${timeInfo}`;
+ }
+
+ let currentLineText = item.text || "";
+
+ if (idx === lines.length - 1) {
+ if (!isFinalizing && item.speaker !== -2) {
+ if (remaining_time_transcription > 0) {
+ speakerLabel += `Lag ${fmt1(
+ remaining_time_transcription
+ )}s`;
+ }
+ if (buffer_diarization && remaining_time_diarization > 0) {
+ speakerLabel += `Lag${fmt1(
+ remaining_time_diarization
+ )}s`;
+ }
+ }
+
+ if (buffer_diarization) {
+ if (isFinalizing) {
+ currentLineText +=
+ (currentLineText.length > 0 && buffer_diarization.trim().length > 0 ? " " : "") + buffer_diarization.trim();
+ } else {
+ currentLineText += `${buffer_diarization}`;
+ }
+ }
+ if (buffer_transcription) {
+ if (isFinalizing) {
+ currentLineText +=
+ (currentLineText.length > 0 && buffer_transcription.trim().length > 0 ? " " : "") +
+ buffer_transcription.trim();
+ } else {
+ currentLineText += `${buffer_transcription}`;
+ }
+ }
+ }
+
+ return currentLineText.trim().length > 0 || speakerLabel.length > 0
+ ? `${speakerLabel}
${currentLineText}
`
+ : `${speakerLabel}
`;
+ })
+ .join("");
+
+ linesTranscriptDiv.innerHTML = linesHtml;
+ window.scrollTo({ top: document.body.scrollHeight, behavior: "smooth" });
+}
+
+function updateTimer() {
+ if (!startTime) return;
+
+ const elapsed = Math.floor((Date.now() - startTime) / 1000);
+ const minutes = Math.floor(elapsed / 60).toString().padStart(2, "0");
+ const seconds = (elapsed % 60).toString().padStart(2, "0");
+ timerElement.textContent = `${minutes}:${seconds}`;
+}
+
+function drawWaveform() {
+ if (!analyser) return;
+
+ const bufferLength = analyser.frequencyBinCount;
+ const dataArray = new Uint8Array(bufferLength);
+ analyser.getByteTimeDomainData(dataArray);
+
+ waveCtx.clearRect(
+ 0,
+ 0,
+ waveCanvas.width / (window.devicePixelRatio || 1),
+ waveCanvas.height / (window.devicePixelRatio || 1)
+ );
+ waveCtx.lineWidth = 1;
+ waveCtx.strokeStyle = waveStroke;
+ waveCtx.beginPath();
+
+ const sliceWidth = (waveCanvas.width / (window.devicePixelRatio || 1)) / bufferLength;
+ let x = 0;
+
+ for (let i = 0; i < bufferLength; i++) {
+ const v = dataArray[i] / 128.0;
+ const y = (v * (waveCanvas.height / (window.devicePixelRatio || 1))) / 2;
+
+ if (i === 0) {
+ waveCtx.moveTo(x, y);
+ } else {
+ waveCtx.lineTo(x, y);
+ }
+
+ x += sliceWidth;
+ }
+
+ waveCtx.lineTo(
+ waveCanvas.width / (window.devicePixelRatio || 1),
+ (waveCanvas.height / (window.devicePixelRatio || 1)) / 2
+ );
+ waveCtx.stroke();
+
+ animationFrame = requestAnimationFrame(drawWaveform);
+}
+
+async function startRecording() {
+ try {
+ try {
+ wakeLock = await navigator.wakeLock.request("screen");
+ } catch (err) {
+ console.log("Error acquiring wake lock.");
+ }
+
+ let stream;
+ try {
+ // Try tab capture first
+ stream = await new Promise((resolve, reject) => {
+ chrome.tabCapture.capture({audio: true}, (s) => {
+ if (s) {
+ resolve(s);
+ } else {
+ reject(new Error('Tab capture failed or not available'));
+ }
+ });
+ });
+ statusText.textContent = "Using tab audio capture.";
+ } catch (tabError) {
+ console.log('Tab capture not available, falling back to microphone', tabError);
+ // Fallback to microphone
+ const audioConstraints = selectedMicrophoneId
+ ? { audio: { deviceId: { exact: selectedMicrophoneId } } }
+ : { audio: true };
+ stream = await navigator.mediaDevices.getUserMedia(audioConstraints);
+ statusText.textContent = "Using microphone audio.";
+ }
+
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
+ analyser = audioContext.createAnalyser();
+ analyser.fftSize = 256;
+ microphone = audioContext.createMediaStreamSource(stream);
+ microphone.connect(analyser);
+
+ recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
+ recorder.ondataavailable = (e) => {
+ if (websocket && websocket.readyState === WebSocket.OPEN) {
+ websocket.send(e.data);
+ }
+ };
+ recorder.start(chunkDuration);
+
+ startTime = Date.now();
+ timerInterval = setInterval(updateTimer, 1000);
+ drawWaveform();
+
+ isRecording = true;
+ updateUI();
+ } catch (err) {
+ if (window.location.hostname === "0.0.0.0") {
+ statusText.textContent =
+ "Error accessing audio input. Browsers may block audio access on 0.0.0.0. Try using localhost:8000 instead.";
+ } else {
+ statusText.textContent = "Error accessing audio input. Please check permissions.";
+ }
+ console.error(err);
+ }
+}
+
+async function stopRecording() {
+ if (wakeLock) {
+ try {
+ await wakeLock.release();
+ } catch (e) {
+ // ignore
+ }
+ wakeLock = null;
+ }
+
+ userClosing = true;
+ waitingForStop = true;
+
+ if (websocket && websocket.readyState === WebSocket.OPEN) {
+ const emptyBlob = new Blob([], { type: "audio/webm" });
+ websocket.send(emptyBlob);
+ statusText.textContent = "Recording stopped. Processing final audio...";
+ }
+
+ if (recorder) {
+ recorder.stop();
+ recorder = null;
+ }
+
+ if (microphone) {
+ microphone.disconnect();
+ microphone = null;
+ }
+
+ if (analyser) {
+ analyser = null;
+ }
+
+ if (audioContext && audioContext.state !== "closed") {
+ try {
+ await audioContext.close();
+ } catch (e) {
+ console.warn("Could not close audio context:", e);
+ }
+ audioContext = null;
+ }
+
+ if (animationFrame) {
+ cancelAnimationFrame(animationFrame);
+ animationFrame = null;
+ }
+
+ if (timerInterval) {
+ clearInterval(timerInterval);
+ timerInterval = null;
+ }
+ timerElement.textContent = "00:00";
+ startTime = null;
+
+ isRecording = false;
+ updateUI();
+}
+
+async function toggleRecording() {
+ if (!isRecording) {
+ if (waitingForStop) {
+ console.log("Waiting for stop, early return");
+ return;
+ }
+ console.log("Connecting to WebSocket");
+ try {
+ if (websocket && websocket.readyState === WebSocket.OPEN) {
+ await startRecording();
+ } else {
+ await setupWebSocket();
+ await startRecording();
+ }
+ } catch (err) {
+ statusText.textContent = "Could not connect to WebSocket or access mic. Aborted.";
+ console.error(err);
+ }
+ } else {
+ console.log("Stopping recording");
+ stopRecording();
+ }
+}
+
+function updateUI() {
+ recordButton.classList.toggle("recording", isRecording);
+ recordButton.disabled = waitingForStop;
+
+ if (waitingForStop) {
+ if (statusText.textContent !== "Recording stopped. Processing final audio...") {
+ statusText.textContent = "Please wait for processing to complete...";
+ }
+ } else if (isRecording) {
+ statusText.textContent = "Recording...";
+ } else {
+ if (
+ statusText.textContent !== "Finished processing audio! Ready to record again." &&
+ statusText.textContent !== "Processing finalized or connection closed."
+ ) {
+ statusText.textContent = "Click to start transcription";
+ }
+ }
+ if (!waitingForStop) {
+ recordButton.disabled = false;
+ }
+}
+
+recordButton.addEventListener("click", toggleRecording);
+
+if (microphoneSelect) {
+ microphoneSelect.addEventListener("change", handleMicrophoneChange);
+}
+// document.addEventListener('DOMContentLoaded', async () => {
+// try {
+// await enumerateMicrophones();
+// } catch (error) {
+// console.log("Could not enumerate microphones on load:", error);
+// }
+// });
+// navigator.mediaDevices.addEventListener('devicechange', async () => {
+// console.log('Device change detected, re-enumerating microphones');
+// try {
+// await enumerateMicrophones();
+// } catch (error) {
+// console.log("Error re-enumerating microphones:", error);
+// }
+// });
diff --git a/chrome-extension/web/src/dark_mode.svg b/chrome-extension/web/src/dark_mode.svg
new file mode 100644
index 0000000..a083e1a
--- /dev/null
+++ b/chrome-extension/web/src/dark_mode.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/chrome-extension/web/src/light_mode.svg b/chrome-extension/web/src/light_mode.svg
new file mode 100644
index 0000000..66b6e74
--- /dev/null
+++ b/chrome-extension/web/src/light_mode.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/chrome-extension/web/src/system_mode.svg b/chrome-extension/web/src/system_mode.svg
new file mode 100644
index 0000000..7a8a0d2
--- /dev/null
+++ b/chrome-extension/web/src/system_mode.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/whisperlivekit/simul_whisper/simul_whisper.py b/whisperlivekit/simul_whisper/simul_whisper.py
index 0b8649e..c1f8c2e 100644
--- a/whisperlivekit/simul_whisper/simul_whisper.py
+++ b/whisperlivekit/simul_whisper/simul_whisper.py
@@ -399,17 +399,17 @@ class PaddedAlignAttWhisper:
mlx_mel_padded = mlx_log_mel_spectrogram(audio=input_segments.detach(), n_mels=self.model.dims.n_mels, padding=N_SAMPLES)
mlx_mel = mlx_pad_or_trim(mlx_mel_padded, N_FRAMES, axis=-2)
mlx_encoder_feature = self.mlx_encoder.encoder(mlx_mel[None])
- encoder_feature = torch.as_tensor(mlx_encoder_feature)
+ encoder_feature = torch.tensor(np.array(mlx_encoder_feature))
content_mel_len = int((mlx_mel_padded.shape[0] - mlx_mel.shape[0])/2)
- device = encoder_feature.device #'cpu' is apple silicon
+ device = 'cpu'
elif self.fw_encoder:
audio_length_seconds = len(input_segments) / 16000
content_mel_len = int(audio_length_seconds * 100)//2
mel_padded_2 = self.fw_feature_extractor(waveform=input_segments.numpy(), padding=N_SAMPLES)[None, :]
mel = fw_pad_or_trim(mel_padded_2, N_FRAMES, axis=-1)
encoder_feature_ctranslate = self.fw_encoder.encode(mel)
- encoder_feature = torch.as_tensor(encoder_feature_ctranslate)
- device = encoder_feature.device
+ encoder_feature = torch.Tensor(np.array(encoder_feature_ctranslate))
+ device = 'cpu'
else:
# mel + padding to 30s
mel_padded = log_mel_spectrogram(input_segments, n_mels=self.model.dims.n_mels, padding=N_SAMPLES,