v0.1.0 chrome extension

2026-03-07 22:33:36 +00:00 · 2025-09-03 22:25:00 +02:00
parent 953697cd86
commit e0a5cbf0e7
15 changed files with 1820 additions and 4 deletions
--- a/chrome-extension/README.md
+++ b/chrome-extension/README.md
@@ -0,0 +1,13 @@
+## WhisperLiveKit Chrome Extension v0.1.0
+Capture the audio of your current tab, transcribe or translate it using WhisperliveKit. **Still unstable**
+
+<img src="https://raw.githubusercontent.com/QuentinFuxa/WhisperLiveKit/refs/heads/main/chrome-extension/demo-extension.png" alt="WhisperLiveKit Demo" width="730">
+
+## Running this extension
+1. Clone this repository.
+2. Load this directory in Chrome as an unpacked extension.
+
+
+## Devs:
+- Impossible to capture audio from tabs if extension is a pannel, unfortunately: https://issues.chromium.org/issues/40926394
+- To capture microphone in an extension, there are tricks: https://github.com/justinmann/sidepanel-audio-issue , https://medium.com/@lynchee.owo/how-to-enable-microphone-access-in-chrome-extensions-by-code-924295170080 (comments)
--- a/chrome-extension/demo-extension.png
+++ b/chrome-extension/demo-extension.png
--- a/chrome-extension/example_tab_capture.js
+++ b/chrome-extension/example_tab_capture.js
@@ -0,0 +1,315 @@
+const extend = function() { //helper function to merge objects
+  let target = arguments[0],
+      sources = [].slice.call(arguments, 1);
+  for (let i = 0; i < sources.length; ++i) {
+    let src = sources[i];
+    for (key in src) {
+      let val = src[key];
+      target[key] = typeof val === "object"
+        ? extend(typeof target[key] === "object" ? target[key] : {}, val)
+        : val;
+    }
+  }
+  return target;
+};
+
+const WORKER_FILE = {
+  wav: "WavWorker.js",
+  mp3: "Mp3Worker.js"
+};
+
+// default configs
+const CONFIGS = {
+  workerDir: "/workers/",     // worker scripts dir (end with /)
+  numChannels: 2,     // number of channels
+  encoding: "wav",    // encoding (can be changed at runtime)
+
+  // runtime options
+  options: {
+    timeLimit: 1200,           // recording time limit (sec)
+    encodeAfterRecord: true, // process encoding after recording
+    progressInterval: 1000,   // encoding progress report interval (millisec)
+    bufferSize: undefined,    // buffer size (use browser default)
+
+    // encoding-specific options
+    wav: {
+      mimeType: "audio/wav"
+    },
+    mp3: {
+      mimeType: "audio/mpeg",
+      bitRate: 192            // (CBR only): bit rate = [64 .. 320]
+    }
+  }
+};
+
+class Recorder {
+
+  constructor(source, configs) { //creates audio context from the source and connects it to the worker
+    extend(this, CONFIGS, configs || {});
+    this.context = source.context;
+    if (this.context.createScriptProcessor == null)
+      this.context.createScriptProcessor = this.context.createJavaScriptNode;
+    this.input = this.context.createGain();
+    source.connect(this.input);
+    this.buffer = [];
+    this.initWorker();
+  }
+
+  isRecording() {
+    return this.processor != null;
+  }
+
+  setEncoding(encoding) {
+    if(!this.isRecording() && this.encoding !== encoding) {
+        this.encoding = encoding;
+        this.initWorker();
+    }
+  }
+
+  setOptions(options) {
+    if (!this.isRecording()) {
+      extend(this.options, options);
+      this.worker.postMessage({ command: "options", options: this.options});
+    }
+  }
+
+  startRecording() {
+    if(!this.isRecording()) {
+      let numChannels = this.numChannels;
+      let buffer = this.buffer;
+      let worker = this.worker;
+      this.processor = this.context.createScriptProcessor(
+        this.options.bufferSize,
+        this.numChannels, this.numChannels);
+      this.input.connect(this.processor);
+      this.processor.connect(this.context.destination);
+      this.processor.onaudioprocess = function(event) {
+        for (var ch = 0; ch < numChannels; ++ch)
+          buffer[ch] = event.inputBuffer.getChannelData(ch);
+        worker.postMessage({ command: "record", buffer: buffer });
+      };
+      this.worker.postMessage({
+        command: "start",
+        bufferSize: this.processor.bufferSize
+      });
+      this.startTime = Date.now();
+    }
+  }
+
+  cancelRecording() {
+    if(this.isRecording()) {
+      this.input.disconnect();
+      this.processor.disconnect();
+      delete this.processor;
+      this.worker.postMessage({ command: "cancel" });
+    }
+  }
+
+  finishRecording() {
+    if (this.isRecording()) {
+      this.input.disconnect();
+      this.processor.disconnect();
+      delete this.processor;
+      this.worker.postMessage({ command: "finish" });
+    }
+  }
+
+  cancelEncoding() {
+    if (this.options.encodeAfterRecord)
+      if (!this.isRecording()) {
+        this.onEncodingCanceled(this);
+        this.initWorker();
+      }
+  }
+
+  initWorker() {
+    if (this.worker != null)
+      this.worker.terminate();
+    this.onEncoderLoading(this, this.encoding);
+    this.worker = new Worker(this.workerDir + WORKER_FILE[this.encoding]);
+    let _this = this;
+    this.worker.onmessage = function(event) {
+      let data = event.data;
+      switch (data.command) {
+        case "loaded":
+          _this.onEncoderLoaded(_this, _this.encoding);
+          break;
+        case "timeout":
+          _this.onTimeout(_this);
+          break;
+        case "progress":
+          _this.onEncodingProgress(_this, data.progress);
+          break;
+        case "complete":
+          _this.onComplete(_this, data.blob);
+      }
+    }
+    this.worker.postMessage({
+      command: "init",
+      config: {
+        sampleRate: this.context.sampleRate,
+        numChannels: this.numChannels
+      },
+      options: this.options
+    });
+  }
+
+  onEncoderLoading(recorder, encoding) {}
+  onEncoderLoaded(recorder, encoding) {}
+  onTimeout(recorder) {}
+  onEncodingProgress(recorder, progress) {}
+  onEncodingCanceled(recorder) {}
+  onComplete(recorder, blob) {}
+
+}
+
+const audioCapture = (timeLimit, muteTab, format, quality, limitRemoved) => {
+  chrome.tabCapture.capture({audio: true}, (stream) => { // sets up stream for capture
+    let startTabId; //tab when the capture is started
+    let timeout;
+    let completeTabID; //tab when the capture is stopped
+    let audioURL = null; //resulting object when encoding is completed
+    chrome.tabs.query({active:true, currentWindow: true}, (tabs) => startTabId = tabs[0].id) //saves start tab
+    const liveStream = stream;
+    const audioCtx = new AudioContext();
+    const source = audioCtx.createMediaStreamSource(stream);
+    let mediaRecorder = new Recorder(source); //initiates the recorder based on the current stream
+    mediaRecorder.setEncoding(format); //sets encoding based on options
+    if(limitRemoved) { //removes time limit
+      mediaRecorder.setOptions({timeLimit: 10800});
+    } else {
+      mediaRecorder.setOptions({timeLimit: timeLimit/1000});
+    }
+    if(format === "mp3") {
+      mediaRecorder.setOptions({mp3: {bitRate: quality}});
+    }
+    mediaRecorder.startRecording();
+
+    function onStopCommand(command) { //keypress
+      if (command === "stop") {
+        stopCapture();
+      }
+    }
+    function onStopClick(request) { //click on popup
+      if(request === "stopCapture") {
+        stopCapture();
+      } else if (request === "cancelCapture") {
+        cancelCapture();
+      } else if (request.cancelEncodeID) {
+        if(request.cancelEncodeID === startTabId && mediaRecorder) {
+          mediaRecorder.cancelEncoding();
+        }
+      }
+    }
+    chrome.commands.onCommand.addListener(onStopCommand);
+    chrome.runtime.onMessage.addListener(onStopClick);
+    mediaRecorder.onComplete = (recorder, blob) => {
+      audioURL = window.URL.createObjectURL(blob);
+      if(completeTabID) {
+        chrome.tabs.sendMessage(completeTabID, {type: "encodingComplete", audioURL});
+      }
+      mediaRecorder = null;
+    }
+    mediaRecorder.onEncodingProgress = (recorder, progress) => {
+      if(completeTabID) {
+        chrome.tabs.sendMessage(completeTabID, {type: "encodingProgress", progress: progress});
+      }
+    }
+
+    const stopCapture = function() {
+      let endTabId;
+      //check to make sure the current tab is the tab being captured
+      chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
+        endTabId = tabs[0].id;
+        if(mediaRecorder && startTabId === endTabId){
+          mediaRecorder.finishRecording();
+          chrome.tabs.create({url: "complete.html"}, (tab) => {
+            completeTabID = tab.id;
+            let completeCallback = () => {
+              chrome.tabs.sendMessage(tab.id, {type: "createTab", format: format, audioURL, startID: startTabId});
+            }
+            setTimeout(completeCallback, 500);
+          });
+          closeStream(endTabId);
+        }
+      })
+    }
+
+    const cancelCapture = function() {
+      let endTabId;
+      chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
+        endTabId = tabs[0].id;
+        if(mediaRecorder && startTabId === endTabId){
+          mediaRecorder.cancelRecording();
+          closeStream(endTabId);
+        }
+      })
+    }
+
+//removes the audio context and closes recorder to save memory
+    const closeStream = function(endTabId) {
+      chrome.commands.onCommand.removeListener(onStopCommand);
+      chrome.runtime.onMessage.removeListener(onStopClick);
+      mediaRecorder.onTimeout = () => {};
+      audioCtx.close();
+      liveStream.getAudioTracks()[0].stop();
+      sessionStorage.removeItem(endTabId);
+      chrome.runtime.sendMessage({captureStopped: endTabId});
+    }
+
+    mediaRecorder.onTimeout = stopCapture;
+
+    if(!muteTab) {
+      let audio = new Audio();
+      audio.srcObject = liveStream;
+      audio.play();
+    }
+  });
+}
+
+
+
+//sends reponses to and from the popup menu
+chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
+  if (request.currentTab && sessionStorage.getItem(request.currentTab)) {
+    sendResponse(sessionStorage.getItem(request.currentTab));
+  } else if (request.currentTab){
+    sendResponse(false);
+  } else if (request === "startCapture") {
+    startCapture();
+  }
+});
+
+const startCapture = function() {
+  chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
+    // CODE TO BLOCK CAPTURE ON YOUTUBE, DO NOT REMOVE
+    // if(tabs[0].url.toLowerCase().includes("youtube")) {
+    //   chrome.tabs.create({url: "error.html"});
+    // } else {
+      if(!sessionStorage.getItem(tabs[0].id)) {
+        sessionStorage.setItem(tabs[0].id, Date.now());
+        chrome.storage.sync.get({
+          maxTime: 1200000,
+          muteTab: false,
+          format: "mp3",
+          quality: 192,
+          limitRemoved: false
+        }, (options) => {
+          let time = options.maxTime;
+          if(time > 1200000) {
+            time = 1200000
+          }
+          audioCapture(time, options.muteTab, options.format, options.quality, options.limitRemoved);
+        });
+        chrome.runtime.sendMessage({captureStarted: tabs[0].id, startTime: Date.now()});
+      }
+    // }
+  });
+};
+
+
+chrome.commands.onCommand.addListener((command) => {
+  if (command === "start") {
+    startCapture();
+  }
+});
--- a/chrome-extension/manifest.json
+++ b/chrome-extension/manifest.json
@@ -0,0 +1,17 @@
+{
+  "manifest_version": 3,
+  "name": "WhisperLiveKit Tab Capture",
+  "version": "1.0",
+  "description": "Capture and transcribe audio from browser tabs using WhisperLiveKit.",
+  "action": {
+    "default_title": "WhisperLiveKit Tab Capture",
+    "default_popup": "popup.html"
+  },
+  "permissions": ["scripting", "tabCapture", "offscreen", "activeTab", "storage"],
+  "web_accessible_resources": [
+    {
+      "resources": ["requestPermissions.html", "requestPermissions.js"],
+      "matches": ["<all_urls>"]
+    }
+  ]
+}
--- a/chrome-extension/popup.html
+++ b/chrome-extension/popup.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>WhisperLiveKit</title>
+    <link rel="stylesheet" href="/web/live_transcription.css" />
+</head>
+
+<body>
+    <div class="settings-container">
+        <button id="recordButton">
+            <div class="shape-container">
+                <div class="shape"></div>
+            </div>
+            <div class="recording-info">
+                <div class="wave-container">
+                    <canvas id="waveCanvas"></canvas>
+                </div>
+                <div class="timer">00:00</div>
+            </div>
+        </button>
+
+        <div class="settings">
+            <div class="field">
+                <label for="websocketInput">Websocket URL</label>
+                <input id="websocketInput" type="text" placeholder="ws://host:port/asr" />
+            </div>
+
+            <!-- <div class="field">
+                <label id="microphoneSelectLabel" for="microphoneSelect">Select Microphone</label>
+                <select id="microphoneSelect">
+                    <option value="">Default Microphone</option>
+                </select>
+            </div> -->
+
+            <div class="theme-selector-container">
+                <div class="segmented" role="radiogroup" aria-label="Theme selector">
+                    <input type="radio" id="theme-system" name="theme" value="system" />
+                    <label for="theme-system" title="System">
+                        <img src="/web/src/system_mode.svg" alt="" />
+                        <span>System</span>
+                    </label>
+
+                    <input type="radio" id="theme-light" name="theme" value="light" />
+                    <label for="theme-light" title="Light">
+                        <img src="/web/src/light_mode.svg" alt="" />
+                        <span>Light</span>
+                    </label>
+
+                    <input type="radio" id="theme-dark" name="theme" value="dark" />
+                    <label for="theme-dark" title="Dark">
+                        <img src="/web/src/dark_mode.svg" alt="" />
+                        <span>Dark</span>
+                    </label>
+                </div>
+            </div>
+
+        </div>
+    </div>
+    </div>
+
+
+
+    <p id="status"></p>
+
+    <div id="linesTranscript"></div>
+
+    <script src="/web/live_transcription.js"></script>
+</body>
+
+</html>
--- a/chrome-extension/requestPermissions.html
+++ b/chrome-extension/requestPermissions.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Request Permissions</title>
+    <script src="requestPermissions.js"></script>
+  </head>
+  <body>
+    This page exists to workaround an issue with Chrome that blocks permission
+    requests from chrome extensions
+    <button id="requestMicrophone">Request Microphone</button>
+  </body>
+</html>
--- a/chrome-extension/requestPermissions.js
+++ b/chrome-extension/requestPermissions.js
@@ -0,0 +1,17 @@
+/**
+ * Requests user permission for microphone access.
+ * @returns {Promise<void>} A Promise that resolves when permission is granted or rejects with an error.
+ */
+async function getUserPermission() {
+  console.log("Getting user permission for microphone access...");
+  await navigator.mediaDevices.getUserMedia({ audio: true });
+  const micPermission = await navigator.permissions.query({
+    name: "microphone",
+  });
+  if (micPermission.state == "granted") {
+    window.close();
+  }
+}
+
+// Call the function to request microphone permission
+getUserPermission();
--- a/chrome-extension/service-worker.js
+++ b/chrome-extension/service-worker.js
@@ -0,0 +1,249 @@
+console.log("Service worker loaded");
+
+let isRecording = false;
+let currentStreamId = null;
+
+chrome.runtime.onInstalled.addListener((details) => {
+  console.log("Extension installed/updated");
+});
+
+chrome.action.onClicked.addListener((tab) => {
+  // Get the current tab ID
+  const tabId = tab.id;
+  
+  // Inject the content script into the current tab
+  chrome.scripting.executeScript({
+    target: { tabId: tabId },
+    files: ['style_popup.js']
+  });
+  
+  console.log(`Content script injected into tab ${tabId}`);
+}); 
+
+
+// Handle messages from popup
+chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => {
+  console.log("Service worker received message:", message);
+  
+  try {
+    switch (message.type) {
+      case 'start-capture':
+        const startResult = await startTabCapture(message.tabId, message.websocketUrl);
+        sendResponse(startResult);
+        break;
+        
+      case 'stop-capture':
+        const stopResult = await stopTabCapture();
+        sendResponse(stopResult);
+        break;
+        
+      case 'get-recording-state':
+        sendResponse({ isRecording: isRecording });
+        break;
+        
+      default:
+        sendResponse({ success: false, error: 'Unknown message type' });
+    }
+  } catch (error) {
+    console.error('Error handling message:', error);
+    sendResponse({ success: false, error: error.message });
+  }
+  
+  return true; // Keep message channel open for async response
+});
+
+async function startTabCapture(tabId, websocketUrl) {
+  console.log('Service worker: Starting tab capture process...');
+  console.log('Service worker: tabId:', tabId, 'websocketUrl:', websocketUrl);
+
+  try {
+    if (isRecording) {
+      console.log('Service worker: Already recording, aborting');
+      return { success: false, error: 'Already recording' };
+    }
+
+    // Check if offscreen document exists
+    console.log('Service worker: Checking for existing offscreen document...');
+    const existingContexts = await chrome.runtime.getContexts({});
+    console.log('Service worker: Found contexts:', existingContexts.length);
+
+    const offscreenDocument = existingContexts.find(
+      (c) => c.contextType === 'OFFSCREEN_DOCUMENT'
+    );
+
+    console.log('Service worker: Offscreen document exists:', !!offscreenDocument);
+
+    // Create offscreen document if it doesn't exist
+    if (!offscreenDocument) {
+      console.log('Service worker: Creating offscreen document...');
+      try {
+        await chrome.offscreen.createDocument({
+          url: 'offscreen.html',
+          reasons: ['USER_MEDIA'],
+          justification: 'Capturing and processing tab audio for transcription'
+        });
+        console.log('Service worker: Offscreen document created successfully');
+
+        // Wait for offscreen document to initialize
+        console.log('Service worker: Waiting for offscreen document to initialize...');
+        await new Promise(resolve => setTimeout(resolve, 500));
+        console.log('Service worker: Offscreen document initialization delay complete');
+
+      } catch (offscreenError) {
+        console.error('Service worker: Failed to create offscreen document:', offscreenError);
+        return { success: false, error: 'Failed to create offscreen document: ' + offscreenError.message };
+      }
+    }
+
+    // Get media stream ID for the tab
+    console.log('Service worker: Getting media stream ID for tab:', tabId);
+    try {
+      currentStreamId = await chrome.tabCapture.getMediaStreamId({
+        targetTabId: tabId
+      });
+      console.log('Service worker: Media stream ID:', currentStreamId);
+    } catch (tabCaptureError) {
+      console.error('Service worker: Failed to get media stream ID:', tabCaptureError);
+      return { success: false, error: 'Failed to get media stream ID: ' + tabCaptureError.message };
+    }
+
+    if (!currentStreamId) {
+      console.log('Service worker: No media stream ID returned');
+      return { success: false, error: 'Failed to get media stream ID - no stream returned' };
+    }
+
+    // Send message to offscreen document to start capture with retry logic
+    console.log('Service worker: Sending start message to offscreen document...');
+
+    let response;
+    let retryCount = 0;
+    const maxRetries = 5;
+
+    while (!response && retryCount < maxRetries) {
+      try {
+        console.log(`Service worker: Attempt ${retryCount + 1}/${maxRetries} to communicate with offscreen document`);
+
+        // Send message to offscreen document without target property
+        response = await chrome.runtime.sendMessage({
+          type: 'start-recording',
+          target: 'offscreen',
+          data: {
+            streamId: currentStreamId,
+            websocketUrl: websocketUrl
+          }
+        });
+
+        if (!response) {
+          console.warn(`Service worker: No response from offscreen document, waiting before retry...`);
+          await new Promise(resolve => setTimeout(resolve, 200));
+          retryCount++;
+        } else {
+          console.log(`Service worker: Successfully communicated with offscreen document on attempt ${retryCount + 1}`);
+        }
+      } catch (sendError) {
+        console.error(`Service worker: Error sending message to offscreen document (attempt ${retryCount + 1}):`, sendError);
+        response = { success: false, error: 'Failed to communicate with offscreen document: ' + sendError.message };
+        break;
+      }
+    }
+
+    console.log('Service worker: Final offscreen document response:', response);
+
+    if (response && response.success) {
+      isRecording = true;
+      console.log('Service worker: Recording started successfully');
+
+      // Notify popup of state change
+      try {
+        chrome.runtime.sendMessage({
+          type: 'recording-state',
+          isRecording: true
+        });
+      } catch (e) {
+        console.warn('Service worker: Could not notify popup of state change:', e);
+      }
+
+      return { success: true };
+    } else {
+      console.log('Service worker: Offscreen document returned failure');
+      return { success: false, error: response?.error || 'Failed to start recording in offscreen document' };
+    }
+
+  } catch (error) {
+    console.error('Service worker: Exception in startTabCapture:', error);
+    return { success: false, error: 'Exception: ' + error.message };
+  }
+}
+
+async function stopTabCapture() {
+  try {
+    if (!isRecording) {
+      return { success: false, error: 'Not currently recording' };
+    }
+    
+    // Send message to offscreen document to stop capture
+    const response = await chrome.runtime.sendMessage({
+      type: 'stop-recording',
+      target: 'offscreen'
+    });
+    
+    isRecording = false;
+    currentStreamId = null;
+    
+    // Notify popup of state change
+    try {
+      chrome.runtime.sendMessage({
+        type: 'recording-state',
+        isRecording: false
+      });
+    } catch (e) {
+      // Popup might be closed, ignore error
+    }
+    
+    return { success: true };
+    
+  } catch (error) {
+    console.error('Error stopping tab capture:', error);
+    isRecording = false;
+    currentStreamId = null;
+    return { success: false, error: error.message };
+  }
+}
+
+// Handle messages from offscreen document
+chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
+  if (message.target === 'service-worker') {
+    switch (message.type) {
+      case 'recording-stopped':
+        isRecording = false;
+        currentStreamId = null;
+        
+        // Notify popup
+        try {
+          chrome.runtime.sendMessage({
+            type: 'recording-state',
+            isRecording: false
+          });
+        } catch (e) {
+          // Popup might be closed, ignore error
+        }
+        break;
+        
+      case 'recording-error':
+        isRecording = false;
+        currentStreamId = null;
+        
+        // Notify popup
+        try {
+          chrome.runtime.sendMessage({
+            type: 'status-update',
+            status: 'error',
+            message: message.error || 'Recording error occurred'
+          });
+        } catch (e) {
+          // Popup might be closed, ignore error
+        }
+        break;
+    }
+  }
+});
--- a/chrome-extension/sidepanel.js
+++ b/chrome-extension/sidepanel.js
@@ -0,0 +1,29 @@
+console.log("sidepanel.js");
+
+async function run() {
+  const micPermission = await navigator.permissions.query({
+    name: "microphone",
+  });
+
+  document.getElementById(
+    "audioPermission"
+  ).innerText = `MICROPHONE: ${micPermission.state}`;
+
+  if (micPermission.state !== "granted") {
+    chrome.tabs.create({ url: "requestPermissions.html" });
+  }
+
+  const intervalId = setInterval(async () => {
+    const micPermission = await navigator.permissions.query({
+      name: "microphone",
+    });
+    if (micPermission.state === "granted") {
+      document.getElementById(
+        "audioPermission"
+      ).innerText = `MICROPHONE: ${micPermission.state}`;
+      clearInterval(intervalId);
+    }
+  }, 100);
+}
+
+void run();
--- a/chrome-extension/web/live_transcription.css
+++ b/chrome-extension/web/live_transcription.css
@@ -0,0 +1,469 @@
+:root {
+  --bg: #ffffff;
+  --text: #111111;
+  --muted: #666666;
+  --border: #e5e5e5;
+  --chip-bg: rgba(0, 0, 0, 0.04);
+  --chip-text: #000000;
+  --spinner-border: #8d8d8d5c;
+  --spinner-top: #b0b0b0;
+  --silence-bg: #f3f3f3;
+  --loading-bg: rgba(255, 77, 77, 0.06);
+  --button-bg: #ffffff;
+  --button-border: #e9e9e9;
+  --wave-stroke: #000000;
+  --label-dia-text: #868686;
+  --label-trans-text: #111111;
+}
+
+@media (prefers-color-scheme: dark) {
+  :root:not([data-theme="light"]) {
+    --bg: #0b0b0b;
+    --text: #e6e6e6;
+    --muted: #9aa0a6;
+    --border: #333333;
+    --chip-bg: rgba(255, 255, 255, 0.08);
+    --chip-text: #e6e6e6;
+    --spinner-border: #555555;
+    --spinner-top: #dddddd;
+    --silence-bg: #1a1a1a;
+    --loading-bg: rgba(255, 77, 77, 0.12);
+    --button-bg: #111111;
+    --button-border: #333333;
+    --wave-stroke: #e6e6e6;
+    --label-dia-text: #b3b3b3;
+    --label-trans-text: #ffffff;
+  }
+}
+
+:root[data-theme="dark"] {
+  --bg: #0b0b0b;
+  --text: #e6e6e6;
+  --muted: #9aa0a6;
+  --border: #333333;
+  --chip-bg: rgba(255, 255, 255, 0.08);
+  --chip-text: #e6e6e6;
+  --spinner-border: #555555;
+  --spinner-top: #dddddd;
+  --silence-bg: #1a1a1a;
+  --loading-bg: rgba(255, 77, 77, 0.12);
+  --button-bg: #111111;
+  --button-border: #333333;
+  --wave-stroke: #e6e6e6;
+  --label-dia-text: #b3b3b3;
+  --label-trans-text: #ffffff;
+}
+
+:root[data-theme="light"] {
+  --bg: #ffffff;
+  --text: #111111;
+  --muted: #666666;
+  --border: #e5e5e5;
+  --chip-bg: rgba(0, 0, 0, 0.04);
+  --chip-text: #000000;
+  --spinner-border: #8d8d8d5c;
+  --spinner-top: #b0b0b0;
+  --silence-bg: #f3f3f3;
+  --loading-bg: rgba(255, 77, 77, 0.06);
+  --button-bg: #ffffff;
+  --button-border: #e9e9e9;
+  --wave-stroke: #000000;
+  --label-dia-text: #868686;
+  --label-trans-text: #111111;
+}
+
+body {
+  font-family: ui-sans-serif, system-ui, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji';
+  margin: 20px;
+  text-align: center;
+  background-color: var(--bg);
+  color: var(--text);
+}
+
+/* Record button */
+#recordButton {
+  width: 50px;
+  height: 50px;
+  border: none;
+  border-radius: 50%;
+  background-color: var(--button-bg);
+  cursor: pointer;
+  transition: all 0.3s ease;
+  border: 1px solid var(--button-border);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  position: relative;
+}
+
+#recordButton.recording {
+  width: 180px;
+  border-radius: 40px;
+  justify-content: flex-start;
+  padding-left: 20px;
+}
+
+#recordButton:active {
+  transform: scale(0.95);
+}
+
+.shape-container {
+  width: 25px;
+  height: 25px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  flex-shrink: 0;
+}
+
+.shape {
+  width: 25px;
+  height: 25px;
+  background-color: rgb(209, 61, 53);
+  border-radius: 50%;
+  transition: all 0.3s ease;
+}
+
+#recordButton:disabled .shape {
+  background-color: #6e6d6d;
+}
+
+#recordButton.recording .shape {
+  border-radius: 5px;
+  width: 25px;
+  height: 25px;
+}
+
+/* Recording elements */
+.recording-info {
+  display: none;
+  align-items: center;
+  margin-left: 15px;
+  flex-grow: 1;
+}
+
+#recordButton.recording .recording-info {
+  display: flex;
+}
+
+.wave-container {
+  width: 60px;
+  height: 30px;
+  position: relative;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+#waveCanvas {
+  width: 100%;
+  height: 100%;
+}
+
+.timer {
+  font-size: 14px;
+  font-weight: 500;
+  color: var(--text);
+  margin-left: 10px;
+}
+
+#status {
+  margin-top: 20px;
+  font-size: 16px;
+  color: var(--text);
+}
+
+/* Settings */
+.settings-container {
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  gap: 15px;
+  margin-top: 20px;
+}
+
+.settings {
+  display: flex;
+  flex-wrap: wrap;
+  align-items: flex-start;
+  gap: 12px;
+}
+
+.field {
+  display: flex;
+  flex-direction: column;
+  align-items: flex-start;
+  gap: 3px;
+}
+
+#chunkSelector,
+#websocketInput,
+#themeSelector,
+#microphoneSelect {
+  font-size: 16px;
+  padding: 5px 8px;
+  border-radius: 8px;
+  border: 1px solid var(--border);
+  background-color: var(--button-bg);
+  color: var(--text);
+  max-height: 30px;
+}
+
+#microphoneSelect {
+  width: 100%;
+  max-width: 190px;
+  min-width: 120px;
+}
+
+#chunkSelector:focus,
+#websocketInput:focus,
+#themeSelector:focus,
+#microphoneSelect:focus {
+  outline: none;
+  border-color: #007bff;
+  box-shadow: 0 0 0 3px rgba(0, 123, 255, 0.15);
+}
+
+label {
+  font-size: 13px;
+  color: var(--muted);
+}
+
+.ws-default {
+  font-size: 12px;
+  color: var(--muted);
+}
+
+/* Segmented pill control for Theme */
+.segmented {
+  display: inline-flex;
+  align-items: stretch;
+  border: 1px solid var(--button-border);
+  background-color: var(--button-bg);
+  border-radius: 999px;
+  overflow: hidden;
+}
+
+.segmented input[type="radio"] {
+  position: absolute;
+  opacity: 0;
+  pointer-events: none;
+}
+
+.theme-selector-container {
+  display: flex;
+  align-items: center;
+  margin-top: 17px;
+}
+
+.segmented label {
+  display: inline-flex;
+  align-items: center;
+  gap: 6px;
+  padding: 6px 12px;
+  font-size: 14px;
+  color: var(--muted);
+  cursor: pointer;
+  user-select: none;
+  transition: background-color 0.2s ease, color 0.2s ease;
+}
+
+.segmented label span {
+  display: none;
+}
+
+.segmented label:hover span {
+  display: inline;
+}
+
+.segmented label:hover {
+  background-color: var(--chip-bg);
+}
+
+.segmented img {
+  width: 16px;
+  height: 16px;
+}
+
+.segmented input[type="radio"]:checked + label {
+  background-color: var(--chip-bg);
+  color: var(--text);
+}
+
+.segmented input[type="radio"]:focus-visible + label,
+.segmented input[type="radio"]:focus + label {
+  outline: 2px solid #007bff;
+  outline-offset: 2px;
+  border-radius: 999px;
+}
+
+/* Transcript area */
+#linesTranscript {
+  margin: 20px auto;
+  max-width: 700px;
+  text-align: left;
+  font-size: 16px;
+}
+
+#linesTranscript p {
+  margin: 0px 0;
+}
+
+#linesTranscript strong {
+  color: var(--text);
+}
+
+#speaker {
+  border: 1px solid var(--border);
+  border-radius: 100px;
+  padding: 2px 10px;
+  font-size: 14px;
+  margin-bottom: 0px;
+}
+
+.label_diarization {
+  background-color: var(--chip-bg);
+  border-radius: 8px 8px 8px 8px;
+  padding: 2px 10px;
+  margin-left: 10px;
+  display: inline-block;
+  white-space: nowrap;
+  font-size: 14px;
+  margin-bottom: 0px;
+  color: var(--label-dia-text);
+}
+
+.label_transcription {
+  background-color: var(--chip-bg);
+  border-radius: 8px 8px 8px 8px;
+  padding: 2px 10px;
+  display: inline-block;
+  white-space: nowrap;
+  margin-left: 10px;
+  font-size: 14px;
+  margin-bottom: 0px;
+  color: var(--label-trans-text);
+}
+
+#timeInfo {
+  color: var(--muted);
+  margin-left: 10px;
+}
+
+.textcontent {
+  font-size: 16px;
+  padding-left: 10px;
+  margin-bottom: 10px;
+  margin-top: 1px;
+  padding-top: 5px;
+  border-radius: 0px 0px 0px 10px;
+}
+
+.buffer_diarization {
+  color: var(--label-dia-text);
+  margin-left: 4px;
+}
+
+.buffer_transcription {
+  color: #7474748c;
+  margin-left: 4px;
+}
+
+.spinner {
+  display: inline-block;
+  width: 8px;
+  height: 8px;
+  border: 2px solid var(--spinner-border);
+  border-top: 2px solid var(--spinner-top);
+  border-radius: 50%;
+  animation: spin 0.7s linear infinite;
+  vertical-align: middle;
+  margin-bottom: 2px;
+  margin-right: 5px;
+}
+
+@keyframes spin {
+  to {
+    transform: rotate(360deg);
+  }
+}
+
+.silence {
+  color: var(--muted);
+  background-color: var(--silence-bg);
+  font-size: 13px;
+  border-radius: 30px;
+  padding: 2px 10px;
+}
+
+.loading {
+  color: var(--muted);
+  background-color: var(--loading-bg);
+  border-radius: 8px 8px 8px 0px;
+  padding: 2px 10px;
+  font-size: 14px;
+  margin-bottom: 0px;
+}
+
+/* for smaller screens */
+@media (max-width: 768px) {
+  .settings-container {
+    flex-direction: column;
+    gap: 10px;
+  }
+  
+  .settings {
+    justify-content: center;
+    gap: 8px;
+  }
+  
+  .field {
+    align-items: center;
+  }
+  
+  #websocketInput,
+  #microphoneSelect {
+    min-width: 200px;
+    max-width: 400px;
+  }
+  
+  .theme-selector-container {
+    margin-top: 10px;
+  }
+}
+
+@media (max-width: 480px) {
+  body {
+    margin: 10px;
+  }
+  
+  .settings {
+    flex-direction: column;
+    align-items: center;
+    gap: 6px;
+  }
+  
+  #websocketInput,
+  #microphoneSelect {
+    max-width: 400px;
+  }
+  
+  .segmented label {
+    padding: 4px 8px;
+    font-size: 12px;
+  }
+  
+  .segmented img {
+    width: 14px;
+    height: 14px;
+  }
+}
+
+
+html
+{
+    width: 400px;  /* max: 800px */
+    height: 600px; /* max: 600px */
+    border-radius: 10px;
+
+}
--- a/chrome-extension/web/live_transcription.js
+++ b/chrome-extension/web/live_transcription.js
@@ -0,0 +1,619 @@
+/* Theme, WebSocket, recording, rendering logic extracted from inline script and adapted for segmented theme control and WS caption */
+let isRecording = false;
+let websocket = null;
+let recorder = null;
+let chunkDuration = 100;
+let websocketUrl = "ws://localhost:8000/asr";
+let userClosing = false;
+let wakeLock = null;
+let startTime = null;
+let timerInterval = null;
+let audioContext = null;
+let analyser = null;
+let microphone = null;
+let waveCanvas = document.getElementById("waveCanvas");
+let waveCtx = waveCanvas.getContext("2d");
+let animationFrame = null;
+let waitingForStop = false;
+let lastReceivedData = null;
+let lastSignature = null;
+let availableMicrophones = [];
+let selectedMicrophoneId = null;
+
+waveCanvas.width = 60 * (window.devicePixelRatio || 1);
+waveCanvas.height = 30 * (window.devicePixelRatio || 1);
+waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1);
+
+const statusText = document.getElementById("status");
+const recordButton = document.getElementById("recordButton");
+const chunkSelector = document.getElementById("chunkSelector");
+const websocketInput = document.getElementById("websocketInput");
+const websocketDefaultSpan = document.getElementById("wsDefaultUrl");
+const linesTranscriptDiv = document.getElementById("linesTranscript");
+const timerElement = document.querySelector(".timer");
+const themeRadios = document.querySelectorAll('input[name="theme"]');
+const microphoneSelect = document.getElementById("microphoneSelect");
+
+function getWaveStroke() {
+  const styles = getComputedStyle(document.documentElement);
+  const v = styles.getPropertyValue("--wave-stroke").trim();
+  return v || "#000";
+}
+
+let waveStroke = getWaveStroke();
+function updateWaveStroke() {
+  waveStroke = getWaveStroke();
+}
+
+function applyTheme(pref) {
+  if (pref === "light") {
+    document.documentElement.setAttribute("data-theme", "light");
+  } else if (pref === "dark") {
+    document.documentElement.setAttribute("data-theme", "dark");
+  } else {
+    document.documentElement.removeAttribute("data-theme");
+  }
+  updateWaveStroke();
+}
+
+// Persisted theme preference
+const savedThemePref = localStorage.getItem("themePreference") || "system";
+applyTheme(savedThemePref);
+if (themeRadios.length) {
+  themeRadios.forEach((r) => {
+    r.checked = r.value === savedThemePref;
+    r.addEventListener("change", () => {
+      if (r.checked) {
+        localStorage.setItem("themePreference", r.value);
+        applyTheme(r.value);
+      }
+    });
+  });
+}
+
+// React to OS theme changes when in "system" mode
+const darkMq = window.matchMedia && window.matchMedia("(prefers-color-scheme: dark)");
+const handleOsThemeChange = () => {
+  const pref = localStorage.getItem("themePreference") || "system";
+  if (pref === "system") updateWaveStroke();
+};
+if (darkMq && darkMq.addEventListener) {
+  darkMq.addEventListener("change", handleOsThemeChange);
+} else if (darkMq && darkMq.addListener) {
+  // deprecated, but included for Safari compatibility
+  darkMq.addListener(handleOsThemeChange);
+}
+
+async function enumerateMicrophones() {
+  try {
+      const micPermission = await navigator.permissions.query({
+    name: "microphone",
+  });
+  
+    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    stream.getTracks().forEach(track => track.stop());
+
+    const devices = await navigator.mediaDevices.enumerateDevices();
+    availableMicrophones = devices.filter(device => device.kind === 'audioinput');
+
+    populateMicrophoneSelect();
+    console.log(`Found ${availableMicrophones.length} microphone(s)`);
+  } catch (error) {
+    console.error('Error enumerating microphones:', error);
+    statusText.textContent = "Error accessing microphones. Please grant permission.";
+  }
+}
+
+function populateMicrophoneSelect() {
+  if (!microphoneSelect) return;
+
+  microphoneSelect.innerHTML = '<option value="">Default Microphone</option>';
+
+  availableMicrophones.forEach((device, index) => {
+    const option = document.createElement('option');
+    option.value = device.deviceId;
+    option.textContent = device.label || `Microphone ${index + 1}`;
+    microphoneSelect.appendChild(option);
+  });
+
+  const savedMicId = localStorage.getItem('selectedMicrophone');
+  if (savedMicId && availableMicrophones.some(mic => mic.deviceId === savedMicId)) {
+    microphoneSelect.value = savedMicId;
+    selectedMicrophoneId = savedMicId;
+  }
+}
+
+function handleMicrophoneChange() {
+  selectedMicrophoneId = microphoneSelect.value || null;
+  localStorage.setItem('selectedMicrophone', selectedMicrophoneId || '');
+
+  const selectedDevice = availableMicrophones.find(mic => mic.deviceId === selectedMicrophoneId);
+  const deviceName = selectedDevice ? selectedDevice.label : 'Default Microphone';
+
+  console.log(`Selected microphone: ${deviceName}`);
+  statusText.textContent = `Microphone changed to: ${deviceName}`;
+
+  if (isRecording) {
+    statusText.textContent = "Switching microphone... Please wait.";
+    stopRecording().then(() => {
+      setTimeout(() => {
+        toggleRecording();
+      }, 1000);
+    });
+  }
+}
+
+// Helpers
+function fmt1(x) {
+  const n = Number(x);
+  return Number.isFinite(n) ? n.toFixed(1) : x;
+}
+
+// Default WebSocket URL computation
+const host = window.location.hostname || "localhost";
+const port = window.location.port;
+const protocol = window.location.protocol === "https:" ? "wss" : "ws";
+const defaultWebSocketUrl = websocketUrl;
+
+// Populate default caption and input
+if (websocketDefaultSpan) websocketDefaultSpan.textContent = defaultWebSocketUrl;
+websocketInput.value = defaultWebSocketUrl;
+websocketUrl = defaultWebSocketUrl;
+
+// Optional chunk selector (guard for presence)
+if (chunkSelector) {
+  chunkSelector.addEventListener("change", () => {
+    chunkDuration = parseInt(chunkSelector.value);
+  });
+}
+
+// WebSocket input change handling
+websocketInput.addEventListener("change", () => {
+  const urlValue = websocketInput.value.trim();
+  if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) {
+    statusText.textContent = "Invalid WebSocket URL (must start with ws:// or wss://)";
+    return;
+  }
+  websocketUrl = urlValue;
+  statusText.textContent = "WebSocket URL updated. Ready to connect.";
+});
+
+function setupWebSocket() {
+  return new Promise((resolve, reject) => {
+    try {
+      websocket = new WebSocket(websocketUrl);
+    } catch (error) {
+      statusText.textContent = "Invalid WebSocket URL. Please check and try again.";
+      reject(error);
+      return;
+    }
+
+    websocket.onopen = () => {
+      statusText.textContent = "Connected to server.";
+      resolve();
+    };
+
+    websocket.onclose = () => {
+      if (userClosing) {
+        if (waitingForStop) {
+          statusText.textContent = "Processing finalized or connection closed.";
+          if (lastReceivedData) {
+            renderLinesWithBuffer(
+              lastReceivedData.lines || [],
+              lastReceivedData.buffer_diarization || "",
+              lastReceivedData.buffer_transcription || "",
+              0,
+              0,
+              true
+            );
+          }
+        }
+      } else {
+        statusText.textContent = "Disconnected from the WebSocket server. (Check logs if model is loading.)";
+        if (isRecording) {
+          stopRecording();
+        }
+      }
+      isRecording = false;
+      waitingForStop = false;
+      userClosing = false;
+      lastReceivedData = null;
+      websocket = null;
+      updateUI();
+    };
+
+    websocket.onerror = () => {
+      statusText.textContent = "Error connecting to WebSocket.";
+      reject(new Error("Error connecting to WebSocket"));
+    };
+
+    websocket.onmessage = (event) => {
+      const data = JSON.parse(event.data);
+
+      if (data.type === "ready_to_stop") {
+        console.log("Ready to stop received, finalizing display and closing WebSocket.");
+        waitingForStop = false;
+
+        if (lastReceivedData) {
+          renderLinesWithBuffer(
+            lastReceivedData.lines || [],
+            lastReceivedData.buffer_diarization || "",
+            lastReceivedData.buffer_transcription || "",
+            0,
+            0,
+            true
+          );
+        }
+        statusText.textContent = "Finished processing audio! Ready to record again.";
+        recordButton.disabled = false;
+
+        if (websocket) {
+          websocket.close();
+        }
+        return;
+      }
+
+      lastReceivedData = data;
+
+      const {
+        lines = [],
+        buffer_transcription = "",
+        buffer_diarization = "",
+        remaining_time_transcription = 0,
+        remaining_time_diarization = 0,
+        status = "active_transcription",
+      } = data;
+
+      renderLinesWithBuffer(
+        lines,
+        buffer_diarization,
+        buffer_transcription,
+        remaining_time_diarization,
+        remaining_time_transcription,
+        false,
+        status
+      );
+    };
+  });
+}
+
+function renderLinesWithBuffer(
+  lines,
+  buffer_diarization,
+  buffer_transcription,
+  remaining_time_diarization,
+  remaining_time_transcription,
+  isFinalizing = false,
+  current_status = "active_transcription"
+) {
+  if (current_status === "no_audio_detected") {
+    linesTranscriptDiv.innerHTML =
+      "<p style='text-align: center; color: var(--muted); margin-top: 20px;'><em>No audio detected...</em></p>";
+    return;
+  }
+
+  const showLoading = !isFinalizing && (lines || []).some((it) => it.speaker == 0);
+  const showTransLag = !isFinalizing && remaining_time_transcription > 0;
+  const showDiaLag = !isFinalizing && !!buffer_diarization && remaining_time_diarization > 0;
+  const signature = JSON.stringify({
+    lines: (lines || []).map((it) => ({ speaker: it.speaker, text: it.text, beg: it.beg, end: it.end })),
+    buffer_transcription: buffer_transcription || "",
+    buffer_diarization: buffer_diarization || "",
+    status: current_status,
+    showLoading,
+    showTransLag,
+    showDiaLag,
+    isFinalizing: !!isFinalizing,
+  });
+  if (lastSignature === signature) {
+    const t = document.querySelector(".lag-transcription-value");
+    if (t) t.textContent = fmt1(remaining_time_transcription);
+    const d = document.querySelector(".lag-diarization-value");
+    if (d) d.textContent = fmt1(remaining_time_diarization);
+    const ld = document.querySelector(".loading-diarization-value");
+    if (ld) ld.textContent = fmt1(remaining_time_diarization);
+    return;
+  }
+  lastSignature = signature;
+
+  const linesHtml = (lines || [])
+    .map((item, idx) => {
+      let timeInfo = "";
+      if (item.beg !== undefined && item.end !== undefined) {
+        timeInfo = ` ${item.beg} - ${item.end}`;
+      }
+
+      let speakerLabel = "";
+      if (item.speaker === -2) {
+        speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
+      } else if (item.speaker == 0 && !isFinalizing) {
+        speakerLabel = `<span class='loading'><span class="spinner"></span><span id='timeInfo'><span class="loading-diarization-value">${fmt1(
+          remaining_time_diarization
+        )}</span> second(s) of audio are undergoing diarization</span></span>`;
+      } else if (item.speaker !== 0) {
+        speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
+      }
+
+      let currentLineText = item.text || "";
+
+      if (idx === lines.length - 1) {
+        if (!isFinalizing && item.speaker !== -2) {
+          if (remaining_time_transcription > 0) {
+            speakerLabel += `<span class="label_transcription"><span class="spinner"></span>Lag <span id='timeInfo'><span class="lag-transcription-value">${fmt1(
+              remaining_time_transcription
+            )}</span>s</span></span>`;
+          }
+          if (buffer_diarization && remaining_time_diarization > 0) {
+            speakerLabel += `<span class="label_diarization"><span class="spinner"></span>Lag<span id='timeInfo'><span class="lag-diarization-value">${fmt1(
+              remaining_time_diarization
+            )}</span>s</span></span>`;
+          }
+        }
+
+        if (buffer_diarization) {
+          if (isFinalizing) {
+            currentLineText +=
+              (currentLineText.length > 0 && buffer_diarization.trim().length > 0 ? " " : "") + buffer_diarization.trim();
+          } else {
+            currentLineText += `<span class="buffer_diarization">${buffer_diarization}</span>`;
+          }
+        }
+        if (buffer_transcription) {
+          if (isFinalizing) {
+            currentLineText +=
+              (currentLineText.length > 0 && buffer_transcription.trim().length > 0 ? " " : "") +
+              buffer_transcription.trim();
+          } else {
+            currentLineText += `<span class="buffer_transcription">${buffer_transcription}</span>`;
+          }
+        }
+      }
+
+      return currentLineText.trim().length > 0 || speakerLabel.length > 0
+        ? `<p>${speakerLabel}<br/><div class='textcontent'>${currentLineText}</div></p>`
+        : `<p>${speakerLabel}<br/></p>`;
+    })
+    .join("");
+
+  linesTranscriptDiv.innerHTML = linesHtml;
+  window.scrollTo({ top: document.body.scrollHeight, behavior: "smooth" });
+}
+
+function updateTimer() {
+  if (!startTime) return;
+
+  const elapsed = Math.floor((Date.now() - startTime) / 1000);
+  const minutes = Math.floor(elapsed / 60).toString().padStart(2, "0");
+  const seconds = (elapsed % 60).toString().padStart(2, "0");
+  timerElement.textContent = `${minutes}:${seconds}`;
+}
+
+function drawWaveform() {
+  if (!analyser) return;
+
+  const bufferLength = analyser.frequencyBinCount;
+  const dataArray = new Uint8Array(bufferLength);
+  analyser.getByteTimeDomainData(dataArray);
+
+  waveCtx.clearRect(
+    0,
+    0,
+    waveCanvas.width / (window.devicePixelRatio || 1),
+    waveCanvas.height / (window.devicePixelRatio || 1)
+  );
+  waveCtx.lineWidth = 1;
+  waveCtx.strokeStyle = waveStroke;
+  waveCtx.beginPath();
+
+  const sliceWidth = (waveCanvas.width / (window.devicePixelRatio || 1)) / bufferLength;
+  let x = 0;
+
+  for (let i = 0; i < bufferLength; i++) {
+    const v = dataArray[i] / 128.0;
+    const y = (v * (waveCanvas.height / (window.devicePixelRatio || 1))) / 2;
+
+    if (i === 0) {
+      waveCtx.moveTo(x, y);
+    } else {
+      waveCtx.lineTo(x, y);
+    }
+
+    x += sliceWidth;
+  }
+
+  waveCtx.lineTo(
+    waveCanvas.width / (window.devicePixelRatio || 1),
+    (waveCanvas.height / (window.devicePixelRatio || 1)) / 2
+  );
+  waveCtx.stroke();
+
+  animationFrame = requestAnimationFrame(drawWaveform);
+}
+
+async function startRecording() {
+  try {
+    try {
+      wakeLock = await navigator.wakeLock.request("screen");
+    } catch (err) {
+      console.log("Error acquiring wake lock.");
+    }
+
+    let stream;
+    try {
+      // Try tab capture first
+      stream = await new Promise((resolve, reject) => {
+        chrome.tabCapture.capture({audio: true}, (s) => {
+          if (s) {
+            resolve(s);
+          } else {
+            reject(new Error('Tab capture failed or not available'));
+          }
+        });
+      });
+      statusText.textContent = "Using tab audio capture.";
+    } catch (tabError) {
+      console.log('Tab capture not available, falling back to microphone', tabError);
+      // Fallback to microphone
+      const audioConstraints = selectedMicrophoneId
+        ? { audio: { deviceId: { exact: selectedMicrophoneId } } }
+        : { audio: true };
+      stream = await navigator.mediaDevices.getUserMedia(audioConstraints);
+      statusText.textContent = "Using microphone audio.";
+    }
+
+    audioContext = new (window.AudioContext || window.webkitAudioContext)();
+    analyser = audioContext.createAnalyser();
+    analyser.fftSize = 256;
+    microphone = audioContext.createMediaStreamSource(stream);
+    microphone.connect(analyser);
+
+    recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
+    recorder.ondataavailable = (e) => {
+      if (websocket && websocket.readyState === WebSocket.OPEN) {
+        websocket.send(e.data);
+      }
+    };
+    recorder.start(chunkDuration);
+
+    startTime = Date.now();
+    timerInterval = setInterval(updateTimer, 1000);
+    drawWaveform();
+
+    isRecording = true;
+    updateUI();
+  } catch (err) {
+    if (window.location.hostname === "0.0.0.0") {
+      statusText.textContent =
+        "Error accessing audio input. Browsers may block audio access on 0.0.0.0. Try using localhost:8000 instead.";
+    } else {
+      statusText.textContent = "Error accessing audio input. Please check permissions.";
+    }
+    console.error(err);
+  }
+}
+
+async function stopRecording() {
+  if (wakeLock) {
+    try {
+      await wakeLock.release();
+    } catch (e) {
+      // ignore
+    }
+    wakeLock = null;
+  }
+
+  userClosing = true;
+  waitingForStop = true;
+
+  if (websocket && websocket.readyState === WebSocket.OPEN) {
+    const emptyBlob = new Blob([], { type: "audio/webm" });
+    websocket.send(emptyBlob);
+    statusText.textContent = "Recording stopped. Processing final audio...";
+  }
+
+  if (recorder) {
+    recorder.stop();
+    recorder = null;
+  }
+
+  if (microphone) {
+    microphone.disconnect();
+    microphone = null;
+  }
+
+  if (analyser) {
+    analyser = null;
+  }
+
+  if (audioContext && audioContext.state !== "closed") {
+    try {
+      await audioContext.close();
+    } catch (e) {
+      console.warn("Could not close audio context:", e);
+    }
+    audioContext = null;
+  }
+
+  if (animationFrame) {
+    cancelAnimationFrame(animationFrame);
+    animationFrame = null;
+  }
+
+  if (timerInterval) {
+    clearInterval(timerInterval);
+    timerInterval = null;
+  }
+  timerElement.textContent = "00:00";
+  startTime = null;
+
+  isRecording = false;
+  updateUI();
+}
+
+async function toggleRecording() {
+  if (!isRecording) {
+    if (waitingForStop) {
+      console.log("Waiting for stop, early return");
+      return;
+    }
+    console.log("Connecting to WebSocket");
+    try {
+      if (websocket && websocket.readyState === WebSocket.OPEN) {
+        await startRecording();
+      } else {
+        await setupWebSocket();
+        await startRecording();
+      }
+    } catch (err) {
+      statusText.textContent = "Could not connect to WebSocket or access mic. Aborted.";
+      console.error(err);
+    }
+  } else {
+    console.log("Stopping recording");
+    stopRecording();
+  }
+}
+
+function updateUI() {
+  recordButton.classList.toggle("recording", isRecording);
+  recordButton.disabled = waitingForStop;
+
+  if (waitingForStop) {
+    if (statusText.textContent !== "Recording stopped. Processing final audio...") {
+      statusText.textContent = "Please wait for processing to complete...";
+    }
+  } else if (isRecording) {
+    statusText.textContent = "Recording...";
+  } else {
+    if (
+      statusText.textContent !== "Finished processing audio! Ready to record again." &&
+      statusText.textContent !== "Processing finalized or connection closed."
+    ) {
+      statusText.textContent = "Click to start transcription";
+    }
+  }
+  if (!waitingForStop) {
+    recordButton.disabled = false;
+  }
+}
+
+recordButton.addEventListener("click", toggleRecording);
+
+if (microphoneSelect) {
+  microphoneSelect.addEventListener("change", handleMicrophoneChange);
+}
+// document.addEventListener('DOMContentLoaded', async () => {
+//   try {
+//     await enumerateMicrophones();
+//   } catch (error) {
+//     console.log("Could not enumerate microphones on load:", error);
+//   }
+// });
+// navigator.mediaDevices.addEventListener('devicechange', async () => {
+//   console.log('Device change detected, re-enumerating microphones');
+//   try {
+//     await enumerateMicrophones();
+//   } catch (error) {
+//     console.log("Error re-enumerating microphones:", error);
+//   }
+// });
--- a/chrome-extension/web/src/dark_mode.svg
+++ b/chrome-extension/web/src/dark_mode.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#5f6368"><path d="M480-120q-151 0-255.5-104.5T120-480q0-138 90-239.5T440-838q13-2 23 3.5t16 14.5q6 9 6.5 21t-7.5 23q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q11-7 22.5-6.5T819-479q10 5 15.5 15t3.5 24q-14 138-117.5 229T480-120Zm0-80q88 0 158-48.5T740-375q-20 5-40 8t-40 3q-123 0-209.5-86.5T364-660q0-20 3-40t8-40q-78 32-126.5 102T200-480q0 116 82 198t198 82Zm-10-270Z"/></svg>
--- a/chrome-extension/web/src/light_mode.svg
+++ b/chrome-extension/web/src/light_mode.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#5f6368"><path d="M480-360q50 0 85-35t35-85q0-50-35-85t-85-35q-50 0-85 35t-35 85q0 50 35 85t85 35Zm0 80q-83 0-141.5-58.5T280-480q0-83 58.5-141.5T480-680q83 0 141.5 58.5T680-480q0 83-58.5 141.5T480-280ZM80-440q-17 0-28.5-11.5T40-480q0-17 11.5-28.5T80-520h80q17 0 28.5 11.5T200-480q0 17-11.5 28.5T160-440H80Zm720 0q-17 0-28.5-11.5T760-480q0-17 11.5-28.5T800-520h80q17 0 28.5 11.5T920-480q0 17-11.5 28.5T880-440h-80ZM480-760q-17 0-28.5-11.5T440-800v-80q0-17 11.5-28.5T480-920q17 0 28.5 11.5T520-880v80q0 17-11.5 28.5T480-760Zm0 720q-17 0-28.5-11.5T440-80v-80q0-17 11.5-28.5T480-200q17 0 28.5 11.5T520-160v80q0 17-11.5 28.5T480-40ZM226-678l-43-42q-12-11-11.5-28t11.5-29q12-12 29-12t28 12l42 43q11 12 11 28t-11 28q-11 12-27.5 11.5T226-678Zm494 495-42-43q-11-12-11-28.5t11-27.5q11-12 27.5-11.5T734-282l43 42q12 11 11.5 28T777-183q-12 12-29 12t-28-12Zm-42-495q-12-11-11.5-27.5T678-734l42-43q11-12 28-11.5t29 11.5q12 12 12 29t-12 28l-43 42q-12 11-28 11t-28-11ZM183-183q-12-12-12-29t12-28l43-42q12-11 28.5-11t27.5 11q12 11 11.5 27.5T282-226l-42 43q-11 12-28 11.5T183-183Zm297-297Z"/></svg>
--- a/chrome-extension/web/src/system_mode.svg
+++ b/chrome-extension/web/src/system_mode.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#5f6368"><path d="M396-396q-32-32-58.5-67T289-537q-5 14-6.5 28.5T281-480q0 83 58 141t141 58q14 0 28.5-2t28.5-6q-39-22-74-48.5T396-396Zm85 196q-56 0-107-21t-91-61q-40-40-61-91t-21-107q0-51 17-97.5t50-84.5q13-14 32-9.5t27 24.5q21 55 52.5 104t73.5 91q42 42 91 73.5T648-326q20 8 24.5 27t-9.5 32q-38 33-84.5 50T481-200Zm223-192q-16-5-23-20.5t-4-32.5q9-48-6-94.5T621-621q-35-35-80.5-49.5T448-677q-17 3-32-4t-21-23q-6-16 1.5-31t23.5-19q69-15 138 4.5T679-678q51 51 71 120t5 138q-4 17-19 25t-32 3ZM480-840q-17 0-28.5-11.5T440-880v-40q0-17 11.5-28.5T480-960q17 0 28.5 11.5T520-920v40q0 17-11.5 28.5T480-840Zm0 840q-17 0-28.5-11.5T440-40v-40q0-17 11.5-28.5T480-120q17 0 28.5 11.5T520-80v40q0 17-11.5 28.5T480 0Zm255-734q-12-12-12-28.5t12-28.5l28-28q11-11 27.5-11t28.5 11q12 12 12 28.5T819-762l-28 28q-12 12-28 12t-28-12ZM141-141q-12-12-12-28.5t12-28.5l28-28q12-12 28-12t28 12q12 12 12 28.5T225-169l-28 28q-11 11-27.5 11T141-141Zm739-299q-17 0-28.5-11.5T840-480q0-17 11.5-28.5T880-520h40q17 0 28.5 11.5T960-480q0 17-11.5 28.5T920-440h-40Zm-840 0q-17 0-28.5-11.5T0-480q0-17 11.5-28.5T40-520h40q17 0 28.5 11.5T120-480q0 17-11.5 28.5T80-440H40Zm779 299q-12 12-28.5 12T762-141l-28-28q-12-12-12-28t12-28q12-12 28.5-12t28.5 12l28 28q11 11 11 27.5T819-141ZM226-735q-12 12-28.5 12T169-735l-28-28q-11-11-11-27.5t11-28.5q12-12 28.5-12t28.5 12l28 28q12 12 12 28t-12 28Zm170 339Z"/></svg>
--- a/whisperlivekit/simul_whisper/simul_whisper.py
+++ b/whisperlivekit/simul_whisper/simul_whisper.py
@@ -399,17 +399,17 @@ class PaddedAlignAttWhisper:
            mlx_mel_padded = mlx_log_mel_spectrogram(audio=input_segments.detach(), n_mels=self.model.dims.n_mels, padding=N_SAMPLES)
            mlx_mel = mlx_pad_or_trim(mlx_mel_padded, N_FRAMES, axis=-2)
            mlx_encoder_feature = self.mlx_encoder.encoder(mlx_mel[None])
-            encoder_feature = torch.as_tensor(mlx_encoder_feature)
+            encoder_feature = torch.tensor(np.array(mlx_encoder_feature))
            content_mel_len = int((mlx_mel_padded.shape[0] - mlx_mel.shape[0])/2)
-            device = encoder_feature.device #'cpu' is apple silicon
+            device = 'cpu'
        elif self.fw_encoder:
            audio_length_seconds = len(input_segments) / 16000   
            content_mel_len = int(audio_length_seconds * 100)//2      
            mel_padded_2 = self.fw_feature_extractor(waveform=input_segments.numpy(), padding=N_SAMPLES)[None, :]
            mel = fw_pad_or_trim(mel_padded_2, N_FRAMES, axis=-1)
            encoder_feature_ctranslate = self.fw_encoder.encode(mel)
-            encoder_feature = torch.as_tensor(encoder_feature_ctranslate)
-            device = encoder_feature.device
+            encoder_feature = torch.Tensor(np.array(encoder_feature_ctranslate))
+            device = 'cpu'
        else:
            # mel + padding to 30s
            mel_padded = log_mel_spectrogram(input_segments, n_mels=self.model.dims.n_mels, padding=N_SAMPLES,
				`@@ -0,0 +1 @@`
				`<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#5f6368"><path d="M480-120q-151 0-255.5-104.5T120-480q0-138 90-239.5T440-838q13-2 23 3.5t16 14.5q6 9 6.5 21t-7.5 23q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q11-7 22.5-6.5T819-479q10 5 15.5 15t3.5 24q-14 138-117.5 229T480-120Zm0-80q88 0 158-48.5T740-375q-20 5-40 8t-40 3q-123 0-209.5-86.5T364-660q0-20 3-40t8-40q-78 32-126.5 102T200-480q0 116 82 198t198 82Zm-10-270Z"/></svg>`
				`@@ -0,0 +1 @@`
				<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#5f6368"><path d="M480-360q50 0 85-35t35-85q0-50-35-85t-85-35q-50 0-85 35t-35 85q0 50 35 85t85 35Zm0 80q-83 0-141.5-58.5T280-480q0-83 58.5-141.5T480-680q83 0 141.5 58.5T680-480q0 83-58.5 141.5T480-280ZM80-440q-17 0-28.5-11.5T40-480q0-17 11.5-28.5T80-520h80q17 0 28.5 11.5T200-480q0 17-11.5 28.5T160-440H80Zm720 0q-17 0-28.5-11.5T760-480q0-17 11.5-28.5T800-520h80q17 0 28.5 11.5T920-480q0 17-11.5 28.5T880-440h-80ZM480-760q-17 0-28.5-11.5T440-800v-80q0-17 11.5-28.5T480-920q17 0 28.5 11.5T520-880v80q0 17-11.5 28.5T480-760Zm0 720q-17 0-28.5-11.5T440-80v-80q0-17 11.5-28.5T480-200q17 0 28.5 11.5T520-160v80q0 17-11.5 28.5T480-40ZM226-678l-43-42q-12-11-11.5-28t11.5-29q12-12 29-12t28 12l42 43q11 12 11 28t-11 28q-11 12-27.5 11.5T226-678Zm494 495-42-43q-11-12-11-28.5t11-27.5q11-12 27.5-11.5T734-282l43 42q12 11 11.5 28T777-183q-12 12-29 12t-28-12Zm-42-495q-12-11-11.5-27.5T678-734l42-43q11-12 28-11.5t29 11.5q12 12 12 29t-12 28l-43 42q-12 11-28 11t-28-11ZM183-183q-12-12-12-29t12-28l43-42q12-11 28.5-11t27.5 11q12 11 11.5 27.5T282-226l-42 43q-11 12-28 11.5T183-183Zm297-297Z"/></svg>