workaround to get the list of microphones in the extension

This commit is contained in:
Quentin Fuxa
2025-09-04 23:58:48 +02:00
parent f3ad4e39e4
commit ee02afec56
12 changed files with 122 additions and 599 deletions

View File

@@ -9,5 +9,9 @@ Capture the audio of your current tab, transcribe or translate it using Whisperl
## Devs:
- Impossible to capture audio from tabs if extension is a pannel, unfortunately: https://issues.chromium.org/issues/40926394
- Impossible to capture audio from tabs if extension is a pannel, unfortunately:
- https://issues.chromium.org/issues/40926394
- https://groups.google.com/a/chromium.org/g/chromium-extensions/c/DET2SXCFnDg
- https://issues.chromium.org/issues/40916430
- To capture microphone in an extension, there are tricks: https://github.com/justinmann/sidepanel-audio-issue , https://medium.com/@lynchee.owo/how-to-enable-microphone-access-in-chrome-extensions-by-code-924295170080 (comments)

View File

@@ -0,0 +1,9 @@
chrome.runtime.onInstalled.addListener((details) => {
if (details.reason.search(/install/g) === -1) {
return
}
chrome.tabs.create({
url: chrome.runtime.getURL("welcome.html"),
active: true
})
})

View File

@@ -1,315 +0,0 @@
const extend = function() { //helper function to merge objects
let target = arguments[0],
sources = [].slice.call(arguments, 1);
for (let i = 0; i < sources.length; ++i) {
let src = sources[i];
for (key in src) {
let val = src[key];
target[key] = typeof val === "object"
? extend(typeof target[key] === "object" ? target[key] : {}, val)
: val;
}
}
return target;
};
const WORKER_FILE = {
wav: "WavWorker.js",
mp3: "Mp3Worker.js"
};
// default configs
const CONFIGS = {
workerDir: "/workers/", // worker scripts dir (end with /)
numChannels: 2, // number of channels
encoding: "wav", // encoding (can be changed at runtime)
// runtime options
options: {
timeLimit: 1200, // recording time limit (sec)
encodeAfterRecord: true, // process encoding after recording
progressInterval: 1000, // encoding progress report interval (millisec)
bufferSize: undefined, // buffer size (use browser default)
// encoding-specific options
wav: {
mimeType: "audio/wav"
},
mp3: {
mimeType: "audio/mpeg",
bitRate: 192 // (CBR only): bit rate = [64 .. 320]
}
}
};
class Recorder {
constructor(source, configs) { //creates audio context from the source and connects it to the worker
extend(this, CONFIGS, configs || {});
this.context = source.context;
if (this.context.createScriptProcessor == null)
this.context.createScriptProcessor = this.context.createJavaScriptNode;
this.input = this.context.createGain();
source.connect(this.input);
this.buffer = [];
this.initWorker();
}
isRecording() {
return this.processor != null;
}
setEncoding(encoding) {
if(!this.isRecording() && this.encoding !== encoding) {
this.encoding = encoding;
this.initWorker();
}
}
setOptions(options) {
if (!this.isRecording()) {
extend(this.options, options);
this.worker.postMessage({ command: "options", options: this.options});
}
}
startRecording() {
if(!this.isRecording()) {
let numChannels = this.numChannels;
let buffer = this.buffer;
let worker = this.worker;
this.processor = this.context.createScriptProcessor(
this.options.bufferSize,
this.numChannels, this.numChannels);
this.input.connect(this.processor);
this.processor.connect(this.context.destination);
this.processor.onaudioprocess = function(event) {
for (var ch = 0; ch < numChannels; ++ch)
buffer[ch] = event.inputBuffer.getChannelData(ch);
worker.postMessage({ command: "record", buffer: buffer });
};
this.worker.postMessage({
command: "start",
bufferSize: this.processor.bufferSize
});
this.startTime = Date.now();
}
}
cancelRecording() {
if(this.isRecording()) {
this.input.disconnect();
this.processor.disconnect();
delete this.processor;
this.worker.postMessage({ command: "cancel" });
}
}
finishRecording() {
if (this.isRecording()) {
this.input.disconnect();
this.processor.disconnect();
delete this.processor;
this.worker.postMessage({ command: "finish" });
}
}
cancelEncoding() {
if (this.options.encodeAfterRecord)
if (!this.isRecording()) {
this.onEncodingCanceled(this);
this.initWorker();
}
}
initWorker() {
if (this.worker != null)
this.worker.terminate();
this.onEncoderLoading(this, this.encoding);
this.worker = new Worker(this.workerDir + WORKER_FILE[this.encoding]);
let _this = this;
this.worker.onmessage = function(event) {
let data = event.data;
switch (data.command) {
case "loaded":
_this.onEncoderLoaded(_this, _this.encoding);
break;
case "timeout":
_this.onTimeout(_this);
break;
case "progress":
_this.onEncodingProgress(_this, data.progress);
break;
case "complete":
_this.onComplete(_this, data.blob);
}
}
this.worker.postMessage({
command: "init",
config: {
sampleRate: this.context.sampleRate,
numChannels: this.numChannels
},
options: this.options
});
}
onEncoderLoading(recorder, encoding) {}
onEncoderLoaded(recorder, encoding) {}
onTimeout(recorder) {}
onEncodingProgress(recorder, progress) {}
onEncodingCanceled(recorder) {}
onComplete(recorder, blob) {}
}
const audioCapture = (timeLimit, muteTab, format, quality, limitRemoved) => {
chrome.tabCapture.capture({audio: true}, (stream) => { // sets up stream for capture
let startTabId; //tab when the capture is started
let timeout;
let completeTabID; //tab when the capture is stopped
let audioURL = null; //resulting object when encoding is completed
chrome.tabs.query({active:true, currentWindow: true}, (tabs) => startTabId = tabs[0].id) //saves start tab
const liveStream = stream;
const audioCtx = new AudioContext();
const source = audioCtx.createMediaStreamSource(stream);
let mediaRecorder = new Recorder(source); //initiates the recorder based on the current stream
mediaRecorder.setEncoding(format); //sets encoding based on options
if(limitRemoved) { //removes time limit
mediaRecorder.setOptions({timeLimit: 10800});
} else {
mediaRecorder.setOptions({timeLimit: timeLimit/1000});
}
if(format === "mp3") {
mediaRecorder.setOptions({mp3: {bitRate: quality}});
}
mediaRecorder.startRecording();
function onStopCommand(command) { //keypress
if (command === "stop") {
stopCapture();
}
}
function onStopClick(request) { //click on popup
if(request === "stopCapture") {
stopCapture();
} else if (request === "cancelCapture") {
cancelCapture();
} else if (request.cancelEncodeID) {
if(request.cancelEncodeID === startTabId && mediaRecorder) {
mediaRecorder.cancelEncoding();
}
}
}
chrome.commands.onCommand.addListener(onStopCommand);
chrome.runtime.onMessage.addListener(onStopClick);
mediaRecorder.onComplete = (recorder, blob) => {
audioURL = window.URL.createObjectURL(blob);
if(completeTabID) {
chrome.tabs.sendMessage(completeTabID, {type: "encodingComplete", audioURL});
}
mediaRecorder = null;
}
mediaRecorder.onEncodingProgress = (recorder, progress) => {
if(completeTabID) {
chrome.tabs.sendMessage(completeTabID, {type: "encodingProgress", progress: progress});
}
}
const stopCapture = function() {
let endTabId;
//check to make sure the current tab is the tab being captured
chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
endTabId = tabs[0].id;
if(mediaRecorder && startTabId === endTabId){
mediaRecorder.finishRecording();
chrome.tabs.create({url: "complete.html"}, (tab) => {
completeTabID = tab.id;
let completeCallback = () => {
chrome.tabs.sendMessage(tab.id, {type: "createTab", format: format, audioURL, startID: startTabId});
}
setTimeout(completeCallback, 500);
});
closeStream(endTabId);
}
})
}
const cancelCapture = function() {
let endTabId;
chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
endTabId = tabs[0].id;
if(mediaRecorder && startTabId === endTabId){
mediaRecorder.cancelRecording();
closeStream(endTabId);
}
})
}
//removes the audio context and closes recorder to save memory
const closeStream = function(endTabId) {
chrome.commands.onCommand.removeListener(onStopCommand);
chrome.runtime.onMessage.removeListener(onStopClick);
mediaRecorder.onTimeout = () => {};
audioCtx.close();
liveStream.getAudioTracks()[0].stop();
sessionStorage.removeItem(endTabId);
chrome.runtime.sendMessage({captureStopped: endTabId});
}
mediaRecorder.onTimeout = stopCapture;
if(!muteTab) {
let audio = new Audio();
audio.srcObject = liveStream;
audio.play();
}
});
}
//sends reponses to and from the popup menu
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
if (request.currentTab && sessionStorage.getItem(request.currentTab)) {
sendResponse(sessionStorage.getItem(request.currentTab));
} else if (request.currentTab){
sendResponse(false);
} else if (request === "startCapture") {
startCapture();
}
});
const startCapture = function() {
chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
// CODE TO BLOCK CAPTURE ON YOUTUBE, DO NOT REMOVE
// if(tabs[0].url.toLowerCase().includes("youtube")) {
// chrome.tabs.create({url: "error.html"});
// } else {
if(!sessionStorage.getItem(tabs[0].id)) {
sessionStorage.setItem(tabs[0].id, Date.now());
chrome.storage.sync.get({
maxTime: 1200000,
muteTab: false,
format: "mp3",
quality: 192,
limitRemoved: false
}, (options) => {
let time = options.maxTime;
if(time > 1200000) {
time = 1200000
}
audioCapture(time, options.muteTab, options.format, options.quality, options.limitRemoved);
});
chrome.runtime.sendMessage({captureStarted: tabs[0].id, startTime: Date.now()});
}
// }
});
};
chrome.commands.onCommand.addListener((command) => {
if (command === "start") {
startCapture();
}
});

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 376 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 823 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@@ -34,6 +34,18 @@ const timerElement = document.querySelector(".timer");
const themeRadios = document.querySelectorAll('input[name="theme"]');
const microphoneSelect = document.getElementById("microphoneSelect");
chrome.runtime.onInstalled.addListener((details) => {
if (details.reason.search(/install/g) === -1) {
return
}
chrome.tabs.create({
url: chrome.runtime.getURL("welcome.html"),
active: true
})
})
function getWaveStroke() {
const styles = getComputedStyle(document.documentElement);
const v = styles.getPropertyValue("--wave-stroke").trim();
@@ -602,18 +614,47 @@ recordButton.addEventListener("click", toggleRecording);
if (microphoneSelect) {
microphoneSelect.addEventListener("change", handleMicrophoneChange);
}
// document.addEventListener('DOMContentLoaded', async () => {
// try {
// await enumerateMicrophones();
// } catch (error) {
// console.log("Could not enumerate microphones on load:", error);
// }
// });
// navigator.mediaDevices.addEventListener('devicechange', async () => {
// console.log('Device change detected, re-enumerating microphones');
// try {
// await enumerateMicrophones();
// } catch (error) {
// console.log("Error re-enumerating microphones:", error);
// }
// });
document.addEventListener('DOMContentLoaded', async () => {
try {
await enumerateMicrophones();
} catch (error) {
console.log("Could not enumerate microphones on load:", error);
}
});
navigator.mediaDevices.addEventListener('devicechange', async () => {
console.log('Device change detected, re-enumerating microphones');
try {
await enumerateMicrophones();
} catch (error) {
console.log("Error re-enumerating microphones:", error);
}
});
async function run() {
const micPermission = await navigator.permissions.query({
name: "microphone",
});
document.getElementById(
"audioPermission"
).innerText = `MICROPHONE: ${micPermission.state}`;
if (micPermission.state !== "granted") {
chrome.tabs.create({ url: "welcome.html" });
}
const intervalId = setInterval(async () => {
const micPermission = await navigator.permissions.query({
name: "microphone",
});
if (micPermission.state === "granted") {
document.getElementById(
"audioPermission"
).innerText = `MICROPHONE: ${micPermission.state}`;
clearInterval(intervalId);
}
}, 100);
}
void run();

View File

@@ -1,17 +1,37 @@
{
"manifest_version": 3,
"name": "WhisperLiveKit Tab Capture",
"version": "1.0",
"description": "Capture and transcribe audio from browser tabs using WhisperLiveKit.",
"action": {
"default_title": "WhisperLiveKit Tab Capture",
"default_popup": "popup.html"
},
"permissions": ["scripting", "tabCapture", "offscreen", "activeTab", "storage"],
"web_accessible_resources": [
{
"resources": ["requestPermissions.html", "requestPermissions.js"],
"matches": ["<all_urls>"]
}
]
}
"manifest_version": 3,
"name": "WhisperLiveKit Tab Capture",
"version": "1.0",
"description": "Capture and transcribe audio from browser tabs using WhisperLiveKit.",
"background": {
"service_worker": "background.js"
},
"icons": {
"16": "icons/icon16.png",
"32": "icons/icon32.png",
"48": "icons/icon48.png",
"128": "icons/icon128.png"
},
"action": {
"default_title": "WhisperLiveKit Tab Capture",
"default_popup": "popup.html"
},
"permissions": [
"scripting",
"tabCapture",
"offscreen",
"activeTab",
"storage"
],
"web_accessible_resources": [
{
"resources": [
"requestPermissions.html",
"requestPermissions.js"
],
"matches": [
"<all_urls>"
]
}
]
}

View File

@@ -9,6 +9,7 @@
</head>
<body>
<div id="audioPermission"></div>
<div class="settings-container">
<button id="recordButton">
<div class="shape-container">
@@ -28,12 +29,12 @@
<input id="websocketInput" type="text" placeholder="ws://host:port/asr" />
</div>
<!-- <div class="field">
<div class="field">
<label id="microphoneSelectLabel" for="microphoneSelect">Select Microphone</label>
<select id="microphoneSelect">
<option value="">Default Microphone</option>
</select>
</div> -->
</div>
<div class="theme-selector-container">
<div class="segmented" role="radiogroup" aria-label="Theme selector">
@@ -67,7 +68,7 @@
<div id="linesTranscript"></div>
<script src="/web/live_transcription.js"></script>
<script src="live_transcription.js"></script>
</body>
</html>

View File

@@ -1,249 +0,0 @@
console.log("Service worker loaded");
let isRecording = false;
let currentStreamId = null;
chrome.runtime.onInstalled.addListener((details) => {
console.log("Extension installed/updated");
});
chrome.action.onClicked.addListener((tab) => {
// Get the current tab ID
const tabId = tab.id;
// Inject the content script into the current tab
chrome.scripting.executeScript({
target: { tabId: tabId },
files: ['style_popup.js']
});
console.log(`Content script injected into tab ${tabId}`);
});
// Handle messages from popup
chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => {
console.log("Service worker received message:", message);
try {
switch (message.type) {
case 'start-capture':
const startResult = await startTabCapture(message.tabId, message.websocketUrl);
sendResponse(startResult);
break;
case 'stop-capture':
const stopResult = await stopTabCapture();
sendResponse(stopResult);
break;
case 'get-recording-state':
sendResponse({ isRecording: isRecording });
break;
default:
sendResponse({ success: false, error: 'Unknown message type' });
}
} catch (error) {
console.error('Error handling message:', error);
sendResponse({ success: false, error: error.message });
}
return true; // Keep message channel open for async response
});
async function startTabCapture(tabId, websocketUrl) {
console.log('Service worker: Starting tab capture process...');
console.log('Service worker: tabId:', tabId, 'websocketUrl:', websocketUrl);
try {
if (isRecording) {
console.log('Service worker: Already recording, aborting');
return { success: false, error: 'Already recording' };
}
// Check if offscreen document exists
console.log('Service worker: Checking for existing offscreen document...');
const existingContexts = await chrome.runtime.getContexts({});
console.log('Service worker: Found contexts:', existingContexts.length);
const offscreenDocument = existingContexts.find(
(c) => c.contextType === 'OFFSCREEN_DOCUMENT'
);
console.log('Service worker: Offscreen document exists:', !!offscreenDocument);
// Create offscreen document if it doesn't exist
if (!offscreenDocument) {
console.log('Service worker: Creating offscreen document...');
try {
await chrome.offscreen.createDocument({
url: 'offscreen.html',
reasons: ['USER_MEDIA'],
justification: 'Capturing and processing tab audio for transcription'
});
console.log('Service worker: Offscreen document created successfully');
// Wait for offscreen document to initialize
console.log('Service worker: Waiting for offscreen document to initialize...');
await new Promise(resolve => setTimeout(resolve, 500));
console.log('Service worker: Offscreen document initialization delay complete');
} catch (offscreenError) {
console.error('Service worker: Failed to create offscreen document:', offscreenError);
return { success: false, error: 'Failed to create offscreen document: ' + offscreenError.message };
}
}
// Get media stream ID for the tab
console.log('Service worker: Getting media stream ID for tab:', tabId);
try {
currentStreamId = await chrome.tabCapture.getMediaStreamId({
targetTabId: tabId
});
console.log('Service worker: Media stream ID:', currentStreamId);
} catch (tabCaptureError) {
console.error('Service worker: Failed to get media stream ID:', tabCaptureError);
return { success: false, error: 'Failed to get media stream ID: ' + tabCaptureError.message };
}
if (!currentStreamId) {
console.log('Service worker: No media stream ID returned');
return { success: false, error: 'Failed to get media stream ID - no stream returned' };
}
// Send message to offscreen document to start capture with retry logic
console.log('Service worker: Sending start message to offscreen document...');
let response;
let retryCount = 0;
const maxRetries = 5;
while (!response && retryCount < maxRetries) {
try {
console.log(`Service worker: Attempt ${retryCount + 1}/${maxRetries} to communicate with offscreen document`);
// Send message to offscreen document without target property
response = await chrome.runtime.sendMessage({
type: 'start-recording',
target: 'offscreen',
data: {
streamId: currentStreamId,
websocketUrl: websocketUrl
}
});
if (!response) {
console.warn(`Service worker: No response from offscreen document, waiting before retry...`);
await new Promise(resolve => setTimeout(resolve, 200));
retryCount++;
} else {
console.log(`Service worker: Successfully communicated with offscreen document on attempt ${retryCount + 1}`);
}
} catch (sendError) {
console.error(`Service worker: Error sending message to offscreen document (attempt ${retryCount + 1}):`, sendError);
response = { success: false, error: 'Failed to communicate with offscreen document: ' + sendError.message };
break;
}
}
console.log('Service worker: Final offscreen document response:', response);
if (response && response.success) {
isRecording = true;
console.log('Service worker: Recording started successfully');
// Notify popup of state change
try {
chrome.runtime.sendMessage({
type: 'recording-state',
isRecording: true
});
} catch (e) {
console.warn('Service worker: Could not notify popup of state change:', e);
}
return { success: true };
} else {
console.log('Service worker: Offscreen document returned failure');
return { success: false, error: response?.error || 'Failed to start recording in offscreen document' };
}
} catch (error) {
console.error('Service worker: Exception in startTabCapture:', error);
return { success: false, error: 'Exception: ' + error.message };
}
}
async function stopTabCapture() {
try {
if (!isRecording) {
return { success: false, error: 'Not currently recording' };
}
// Send message to offscreen document to stop capture
const response = await chrome.runtime.sendMessage({
type: 'stop-recording',
target: 'offscreen'
});
isRecording = false;
currentStreamId = null;
// Notify popup of state change
try {
chrome.runtime.sendMessage({
type: 'recording-state',
isRecording: false
});
} catch (e) {
// Popup might be closed, ignore error
}
return { success: true };
} catch (error) {
console.error('Error stopping tab capture:', error);
isRecording = false;
currentStreamId = null;
return { success: false, error: error.message };
}
}
// Handle messages from offscreen document
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
if (message.target === 'service-worker') {
switch (message.type) {
case 'recording-stopped':
isRecording = false;
currentStreamId = null;
// Notify popup
try {
chrome.runtime.sendMessage({
type: 'recording-state',
isRecording: false
});
} catch (e) {
// Popup might be closed, ignore error
}
break;
case 'recording-error':
isRecording = false;
currentStreamId = null;
// Notify popup
try {
chrome.runtime.sendMessage({
type: 'status-update',
status: 'error',
message: message.error || 'Recording error occurred'
});
} catch (e) {
// Popup might be closed, ignore error
}
break;
}
}
});

View File

@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<title>Welcome</title>
<script src="welcome.js"></script>
</head>
<body>
This page exists to workaround an issue with Chrome that blocks permission
requests from chrome extensions
<!-- <button id="requestMicrophone">Request Microphone</button> -->
</body>
</html>