mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-07 22:33:36 +00:00
v0.1.0 chrome extension
This commit is contained in:
13
chrome-extension/README.md
Normal file
13
chrome-extension/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
## WhisperLiveKit Chrome Extension v0.1.0
|
||||
Capture the audio of your current tab, transcribe or translate it using WhisperliveKit. **Still unstable**
|
||||
|
||||
<img src="https://raw.githubusercontent.com/QuentinFuxa/WhisperLiveKit/refs/heads/main/chrome-extension/demo-extension.png" alt="WhisperLiveKit Demo" width="730">
|
||||
|
||||
## Running this extension
|
||||
1. Clone this repository.
|
||||
2. Load this directory in Chrome as an unpacked extension.
|
||||
|
||||
|
||||
## Devs:
|
||||
- Impossible to capture audio from tabs if extension is a pannel, unfortunately: https://issues.chromium.org/issues/40926394
|
||||
- To capture microphone in an extension, there are tricks: https://github.com/justinmann/sidepanel-audio-issue , https://medium.com/@lynchee.owo/how-to-enable-microphone-access-in-chrome-extensions-by-code-924295170080 (comments)
|
||||
BIN
chrome-extension/demo-extension.png
Normal file
BIN
chrome-extension/demo-extension.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.2 MiB |
315
chrome-extension/example_tab_capture.js
Normal file
315
chrome-extension/example_tab_capture.js
Normal file
@@ -0,0 +1,315 @@
|
||||
const extend = function() { //helper function to merge objects
|
||||
let target = arguments[0],
|
||||
sources = [].slice.call(arguments, 1);
|
||||
for (let i = 0; i < sources.length; ++i) {
|
||||
let src = sources[i];
|
||||
for (key in src) {
|
||||
let val = src[key];
|
||||
target[key] = typeof val === "object"
|
||||
? extend(typeof target[key] === "object" ? target[key] : {}, val)
|
||||
: val;
|
||||
}
|
||||
}
|
||||
return target;
|
||||
};
|
||||
|
||||
const WORKER_FILE = {
|
||||
wav: "WavWorker.js",
|
||||
mp3: "Mp3Worker.js"
|
||||
};
|
||||
|
||||
// default configs
|
||||
const CONFIGS = {
|
||||
workerDir: "/workers/", // worker scripts dir (end with /)
|
||||
numChannels: 2, // number of channels
|
||||
encoding: "wav", // encoding (can be changed at runtime)
|
||||
|
||||
// runtime options
|
||||
options: {
|
||||
timeLimit: 1200, // recording time limit (sec)
|
||||
encodeAfterRecord: true, // process encoding after recording
|
||||
progressInterval: 1000, // encoding progress report interval (millisec)
|
||||
bufferSize: undefined, // buffer size (use browser default)
|
||||
|
||||
// encoding-specific options
|
||||
wav: {
|
||||
mimeType: "audio/wav"
|
||||
},
|
||||
mp3: {
|
||||
mimeType: "audio/mpeg",
|
||||
bitRate: 192 // (CBR only): bit rate = [64 .. 320]
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Recorder {
|
||||
|
||||
constructor(source, configs) { //creates audio context from the source and connects it to the worker
|
||||
extend(this, CONFIGS, configs || {});
|
||||
this.context = source.context;
|
||||
if (this.context.createScriptProcessor == null)
|
||||
this.context.createScriptProcessor = this.context.createJavaScriptNode;
|
||||
this.input = this.context.createGain();
|
||||
source.connect(this.input);
|
||||
this.buffer = [];
|
||||
this.initWorker();
|
||||
}
|
||||
|
||||
isRecording() {
|
||||
return this.processor != null;
|
||||
}
|
||||
|
||||
setEncoding(encoding) {
|
||||
if(!this.isRecording() && this.encoding !== encoding) {
|
||||
this.encoding = encoding;
|
||||
this.initWorker();
|
||||
}
|
||||
}
|
||||
|
||||
setOptions(options) {
|
||||
if (!this.isRecording()) {
|
||||
extend(this.options, options);
|
||||
this.worker.postMessage({ command: "options", options: this.options});
|
||||
}
|
||||
}
|
||||
|
||||
startRecording() {
|
||||
if(!this.isRecording()) {
|
||||
let numChannels = this.numChannels;
|
||||
let buffer = this.buffer;
|
||||
let worker = this.worker;
|
||||
this.processor = this.context.createScriptProcessor(
|
||||
this.options.bufferSize,
|
||||
this.numChannels, this.numChannels);
|
||||
this.input.connect(this.processor);
|
||||
this.processor.connect(this.context.destination);
|
||||
this.processor.onaudioprocess = function(event) {
|
||||
for (var ch = 0; ch < numChannels; ++ch)
|
||||
buffer[ch] = event.inputBuffer.getChannelData(ch);
|
||||
worker.postMessage({ command: "record", buffer: buffer });
|
||||
};
|
||||
this.worker.postMessage({
|
||||
command: "start",
|
||||
bufferSize: this.processor.bufferSize
|
||||
});
|
||||
this.startTime = Date.now();
|
||||
}
|
||||
}
|
||||
|
||||
cancelRecording() {
|
||||
if(this.isRecording()) {
|
||||
this.input.disconnect();
|
||||
this.processor.disconnect();
|
||||
delete this.processor;
|
||||
this.worker.postMessage({ command: "cancel" });
|
||||
}
|
||||
}
|
||||
|
||||
finishRecording() {
|
||||
if (this.isRecording()) {
|
||||
this.input.disconnect();
|
||||
this.processor.disconnect();
|
||||
delete this.processor;
|
||||
this.worker.postMessage({ command: "finish" });
|
||||
}
|
||||
}
|
||||
|
||||
cancelEncoding() {
|
||||
if (this.options.encodeAfterRecord)
|
||||
if (!this.isRecording()) {
|
||||
this.onEncodingCanceled(this);
|
||||
this.initWorker();
|
||||
}
|
||||
}
|
||||
|
||||
initWorker() {
|
||||
if (this.worker != null)
|
||||
this.worker.terminate();
|
||||
this.onEncoderLoading(this, this.encoding);
|
||||
this.worker = new Worker(this.workerDir + WORKER_FILE[this.encoding]);
|
||||
let _this = this;
|
||||
this.worker.onmessage = function(event) {
|
||||
let data = event.data;
|
||||
switch (data.command) {
|
||||
case "loaded":
|
||||
_this.onEncoderLoaded(_this, _this.encoding);
|
||||
break;
|
||||
case "timeout":
|
||||
_this.onTimeout(_this);
|
||||
break;
|
||||
case "progress":
|
||||
_this.onEncodingProgress(_this, data.progress);
|
||||
break;
|
||||
case "complete":
|
||||
_this.onComplete(_this, data.blob);
|
||||
}
|
||||
}
|
||||
this.worker.postMessage({
|
||||
command: "init",
|
||||
config: {
|
||||
sampleRate: this.context.sampleRate,
|
||||
numChannels: this.numChannels
|
||||
},
|
||||
options: this.options
|
||||
});
|
||||
}
|
||||
|
||||
onEncoderLoading(recorder, encoding) {}
|
||||
onEncoderLoaded(recorder, encoding) {}
|
||||
onTimeout(recorder) {}
|
||||
onEncodingProgress(recorder, progress) {}
|
||||
onEncodingCanceled(recorder) {}
|
||||
onComplete(recorder, blob) {}
|
||||
|
||||
}
|
||||
|
||||
const audioCapture = (timeLimit, muteTab, format, quality, limitRemoved) => {
|
||||
chrome.tabCapture.capture({audio: true}, (stream) => { // sets up stream for capture
|
||||
let startTabId; //tab when the capture is started
|
||||
let timeout;
|
||||
let completeTabID; //tab when the capture is stopped
|
||||
let audioURL = null; //resulting object when encoding is completed
|
||||
chrome.tabs.query({active:true, currentWindow: true}, (tabs) => startTabId = tabs[0].id) //saves start tab
|
||||
const liveStream = stream;
|
||||
const audioCtx = new AudioContext();
|
||||
const source = audioCtx.createMediaStreamSource(stream);
|
||||
let mediaRecorder = new Recorder(source); //initiates the recorder based on the current stream
|
||||
mediaRecorder.setEncoding(format); //sets encoding based on options
|
||||
if(limitRemoved) { //removes time limit
|
||||
mediaRecorder.setOptions({timeLimit: 10800});
|
||||
} else {
|
||||
mediaRecorder.setOptions({timeLimit: timeLimit/1000});
|
||||
}
|
||||
if(format === "mp3") {
|
||||
mediaRecorder.setOptions({mp3: {bitRate: quality}});
|
||||
}
|
||||
mediaRecorder.startRecording();
|
||||
|
||||
function onStopCommand(command) { //keypress
|
||||
if (command === "stop") {
|
||||
stopCapture();
|
||||
}
|
||||
}
|
||||
function onStopClick(request) { //click on popup
|
||||
if(request === "stopCapture") {
|
||||
stopCapture();
|
||||
} else if (request === "cancelCapture") {
|
||||
cancelCapture();
|
||||
} else if (request.cancelEncodeID) {
|
||||
if(request.cancelEncodeID === startTabId && mediaRecorder) {
|
||||
mediaRecorder.cancelEncoding();
|
||||
}
|
||||
}
|
||||
}
|
||||
chrome.commands.onCommand.addListener(onStopCommand);
|
||||
chrome.runtime.onMessage.addListener(onStopClick);
|
||||
mediaRecorder.onComplete = (recorder, blob) => {
|
||||
audioURL = window.URL.createObjectURL(blob);
|
||||
if(completeTabID) {
|
||||
chrome.tabs.sendMessage(completeTabID, {type: "encodingComplete", audioURL});
|
||||
}
|
||||
mediaRecorder = null;
|
||||
}
|
||||
mediaRecorder.onEncodingProgress = (recorder, progress) => {
|
||||
if(completeTabID) {
|
||||
chrome.tabs.sendMessage(completeTabID, {type: "encodingProgress", progress: progress});
|
||||
}
|
||||
}
|
||||
|
||||
const stopCapture = function() {
|
||||
let endTabId;
|
||||
//check to make sure the current tab is the tab being captured
|
||||
chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
|
||||
endTabId = tabs[0].id;
|
||||
if(mediaRecorder && startTabId === endTabId){
|
||||
mediaRecorder.finishRecording();
|
||||
chrome.tabs.create({url: "complete.html"}, (tab) => {
|
||||
completeTabID = tab.id;
|
||||
let completeCallback = () => {
|
||||
chrome.tabs.sendMessage(tab.id, {type: "createTab", format: format, audioURL, startID: startTabId});
|
||||
}
|
||||
setTimeout(completeCallback, 500);
|
||||
});
|
||||
closeStream(endTabId);
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const cancelCapture = function() {
|
||||
let endTabId;
|
||||
chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
|
||||
endTabId = tabs[0].id;
|
||||
if(mediaRecorder && startTabId === endTabId){
|
||||
mediaRecorder.cancelRecording();
|
||||
closeStream(endTabId);
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
//removes the audio context and closes recorder to save memory
|
||||
const closeStream = function(endTabId) {
|
||||
chrome.commands.onCommand.removeListener(onStopCommand);
|
||||
chrome.runtime.onMessage.removeListener(onStopClick);
|
||||
mediaRecorder.onTimeout = () => {};
|
||||
audioCtx.close();
|
||||
liveStream.getAudioTracks()[0].stop();
|
||||
sessionStorage.removeItem(endTabId);
|
||||
chrome.runtime.sendMessage({captureStopped: endTabId});
|
||||
}
|
||||
|
||||
mediaRecorder.onTimeout = stopCapture;
|
||||
|
||||
if(!muteTab) {
|
||||
let audio = new Audio();
|
||||
audio.srcObject = liveStream;
|
||||
audio.play();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
//sends reponses to and from the popup menu
|
||||
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
|
||||
if (request.currentTab && sessionStorage.getItem(request.currentTab)) {
|
||||
sendResponse(sessionStorage.getItem(request.currentTab));
|
||||
} else if (request.currentTab){
|
||||
sendResponse(false);
|
||||
} else if (request === "startCapture") {
|
||||
startCapture();
|
||||
}
|
||||
});
|
||||
|
||||
const startCapture = function() {
|
||||
chrome.tabs.query({active: true, currentWindow: true}, (tabs) => {
|
||||
// CODE TO BLOCK CAPTURE ON YOUTUBE, DO NOT REMOVE
|
||||
// if(tabs[0].url.toLowerCase().includes("youtube")) {
|
||||
// chrome.tabs.create({url: "error.html"});
|
||||
// } else {
|
||||
if(!sessionStorage.getItem(tabs[0].id)) {
|
||||
sessionStorage.setItem(tabs[0].id, Date.now());
|
||||
chrome.storage.sync.get({
|
||||
maxTime: 1200000,
|
||||
muteTab: false,
|
||||
format: "mp3",
|
||||
quality: 192,
|
||||
limitRemoved: false
|
||||
}, (options) => {
|
||||
let time = options.maxTime;
|
||||
if(time > 1200000) {
|
||||
time = 1200000
|
||||
}
|
||||
audioCapture(time, options.muteTab, options.format, options.quality, options.limitRemoved);
|
||||
});
|
||||
chrome.runtime.sendMessage({captureStarted: tabs[0].id, startTime: Date.now()});
|
||||
}
|
||||
// }
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
chrome.commands.onCommand.addListener((command) => {
|
||||
if (command === "start") {
|
||||
startCapture();
|
||||
}
|
||||
});
|
||||
17
chrome-extension/manifest.json
Normal file
17
chrome-extension/manifest.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"manifest_version": 3,
|
||||
"name": "WhisperLiveKit Tab Capture",
|
||||
"version": "1.0",
|
||||
"description": "Capture and transcribe audio from browser tabs using WhisperLiveKit.",
|
||||
"action": {
|
||||
"default_title": "WhisperLiveKit Tab Capture",
|
||||
"default_popup": "popup.html"
|
||||
},
|
||||
"permissions": ["scripting", "tabCapture", "offscreen", "activeTab", "storage"],
|
||||
"web_accessible_resources": [
|
||||
{
|
||||
"resources": ["requestPermissions.html", "requestPermissions.js"],
|
||||
"matches": ["<all_urls>"]
|
||||
}
|
||||
]
|
||||
}
|
||||
73
chrome-extension/popup.html
Normal file
73
chrome-extension/popup.html
Normal file
@@ -0,0 +1,73 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>WhisperLiveKit</title>
|
||||
<link rel="stylesheet" href="/web/live_transcription.css" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="settings-container">
|
||||
<button id="recordButton">
|
||||
<div class="shape-container">
|
||||
<div class="shape"></div>
|
||||
</div>
|
||||
<div class="recording-info">
|
||||
<div class="wave-container">
|
||||
<canvas id="waveCanvas"></canvas>
|
||||
</div>
|
||||
<div class="timer">00:00</div>
|
||||
</div>
|
||||
</button>
|
||||
|
||||
<div class="settings">
|
||||
<div class="field">
|
||||
<label for="websocketInput">Websocket URL</label>
|
||||
<input id="websocketInput" type="text" placeholder="ws://host:port/asr" />
|
||||
</div>
|
||||
|
||||
<!-- <div class="field">
|
||||
<label id="microphoneSelectLabel" for="microphoneSelect">Select Microphone</label>
|
||||
<select id="microphoneSelect">
|
||||
<option value="">Default Microphone</option>
|
||||
</select>
|
||||
</div> -->
|
||||
|
||||
<div class="theme-selector-container">
|
||||
<div class="segmented" role="radiogroup" aria-label="Theme selector">
|
||||
<input type="radio" id="theme-system" name="theme" value="system" />
|
||||
<label for="theme-system" title="System">
|
||||
<img src="/web/src/system_mode.svg" alt="" />
|
||||
<span>System</span>
|
||||
</label>
|
||||
|
||||
<input type="radio" id="theme-light" name="theme" value="light" />
|
||||
<label for="theme-light" title="Light">
|
||||
<img src="/web/src/light_mode.svg" alt="" />
|
||||
<span>Light</span>
|
||||
</label>
|
||||
|
||||
<input type="radio" id="theme-dark" name="theme" value="dark" />
|
||||
<label for="theme-dark" title="Dark">
|
||||
<img src="/web/src/dark_mode.svg" alt="" />
|
||||
<span>Dark</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<p id="status"></p>
|
||||
|
||||
<div id="linesTranscript"></div>
|
||||
|
||||
<script src="/web/live_transcription.js"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
12
chrome-extension/requestPermissions.html
Normal file
12
chrome-extension/requestPermissions.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Request Permissions</title>
|
||||
<script src="requestPermissions.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
This page exists to workaround an issue with Chrome that blocks permission
|
||||
requests from chrome extensions
|
||||
<button id="requestMicrophone">Request Microphone</button>
|
||||
</body>
|
||||
</html>
|
||||
17
chrome-extension/requestPermissions.js
Normal file
17
chrome-extension/requestPermissions.js
Normal file
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* Requests user permission for microphone access.
|
||||
* @returns {Promise<void>} A Promise that resolves when permission is granted or rejects with an error.
|
||||
*/
|
||||
async function getUserPermission() {
|
||||
console.log("Getting user permission for microphone access...");
|
||||
await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const micPermission = await navigator.permissions.query({
|
||||
name: "microphone",
|
||||
});
|
||||
if (micPermission.state == "granted") {
|
||||
window.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Call the function to request microphone permission
|
||||
getUserPermission();
|
||||
249
chrome-extension/service-worker.js
Normal file
249
chrome-extension/service-worker.js
Normal file
@@ -0,0 +1,249 @@
|
||||
console.log("Service worker loaded");
|
||||
|
||||
let isRecording = false;
|
||||
let currentStreamId = null;
|
||||
|
||||
chrome.runtime.onInstalled.addListener((details) => {
|
||||
console.log("Extension installed/updated");
|
||||
});
|
||||
|
||||
chrome.action.onClicked.addListener((tab) => {
|
||||
// Get the current tab ID
|
||||
const tabId = tab.id;
|
||||
|
||||
// Inject the content script into the current tab
|
||||
chrome.scripting.executeScript({
|
||||
target: { tabId: tabId },
|
||||
files: ['style_popup.js']
|
||||
});
|
||||
|
||||
console.log(`Content script injected into tab ${tabId}`);
|
||||
});
|
||||
|
||||
|
||||
// Handle messages from popup
|
||||
chrome.runtime.onMessage.addListener(async (message, sender, sendResponse) => {
|
||||
console.log("Service worker received message:", message);
|
||||
|
||||
try {
|
||||
switch (message.type) {
|
||||
case 'start-capture':
|
||||
const startResult = await startTabCapture(message.tabId, message.websocketUrl);
|
||||
sendResponse(startResult);
|
||||
break;
|
||||
|
||||
case 'stop-capture':
|
||||
const stopResult = await stopTabCapture();
|
||||
sendResponse(stopResult);
|
||||
break;
|
||||
|
||||
case 'get-recording-state':
|
||||
sendResponse({ isRecording: isRecording });
|
||||
break;
|
||||
|
||||
default:
|
||||
sendResponse({ success: false, error: 'Unknown message type' });
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error handling message:', error);
|
||||
sendResponse({ success: false, error: error.message });
|
||||
}
|
||||
|
||||
return true; // Keep message channel open for async response
|
||||
});
|
||||
|
||||
async function startTabCapture(tabId, websocketUrl) {
|
||||
console.log('Service worker: Starting tab capture process...');
|
||||
console.log('Service worker: tabId:', tabId, 'websocketUrl:', websocketUrl);
|
||||
|
||||
try {
|
||||
if (isRecording) {
|
||||
console.log('Service worker: Already recording, aborting');
|
||||
return { success: false, error: 'Already recording' };
|
||||
}
|
||||
|
||||
// Check if offscreen document exists
|
||||
console.log('Service worker: Checking for existing offscreen document...');
|
||||
const existingContexts = await chrome.runtime.getContexts({});
|
||||
console.log('Service worker: Found contexts:', existingContexts.length);
|
||||
|
||||
const offscreenDocument = existingContexts.find(
|
||||
(c) => c.contextType === 'OFFSCREEN_DOCUMENT'
|
||||
);
|
||||
|
||||
console.log('Service worker: Offscreen document exists:', !!offscreenDocument);
|
||||
|
||||
// Create offscreen document if it doesn't exist
|
||||
if (!offscreenDocument) {
|
||||
console.log('Service worker: Creating offscreen document...');
|
||||
try {
|
||||
await chrome.offscreen.createDocument({
|
||||
url: 'offscreen.html',
|
||||
reasons: ['USER_MEDIA'],
|
||||
justification: 'Capturing and processing tab audio for transcription'
|
||||
});
|
||||
console.log('Service worker: Offscreen document created successfully');
|
||||
|
||||
// Wait for offscreen document to initialize
|
||||
console.log('Service worker: Waiting for offscreen document to initialize...');
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
console.log('Service worker: Offscreen document initialization delay complete');
|
||||
|
||||
} catch (offscreenError) {
|
||||
console.error('Service worker: Failed to create offscreen document:', offscreenError);
|
||||
return { success: false, error: 'Failed to create offscreen document: ' + offscreenError.message };
|
||||
}
|
||||
}
|
||||
|
||||
// Get media stream ID for the tab
|
||||
console.log('Service worker: Getting media stream ID for tab:', tabId);
|
||||
try {
|
||||
currentStreamId = await chrome.tabCapture.getMediaStreamId({
|
||||
targetTabId: tabId
|
||||
});
|
||||
console.log('Service worker: Media stream ID:', currentStreamId);
|
||||
} catch (tabCaptureError) {
|
||||
console.error('Service worker: Failed to get media stream ID:', tabCaptureError);
|
||||
return { success: false, error: 'Failed to get media stream ID: ' + tabCaptureError.message };
|
||||
}
|
||||
|
||||
if (!currentStreamId) {
|
||||
console.log('Service worker: No media stream ID returned');
|
||||
return { success: false, error: 'Failed to get media stream ID - no stream returned' };
|
||||
}
|
||||
|
||||
// Send message to offscreen document to start capture with retry logic
|
||||
console.log('Service worker: Sending start message to offscreen document...');
|
||||
|
||||
let response;
|
||||
let retryCount = 0;
|
||||
const maxRetries = 5;
|
||||
|
||||
while (!response && retryCount < maxRetries) {
|
||||
try {
|
||||
console.log(`Service worker: Attempt ${retryCount + 1}/${maxRetries} to communicate with offscreen document`);
|
||||
|
||||
// Send message to offscreen document without target property
|
||||
response = await chrome.runtime.sendMessage({
|
||||
type: 'start-recording',
|
||||
target: 'offscreen',
|
||||
data: {
|
||||
streamId: currentStreamId,
|
||||
websocketUrl: websocketUrl
|
||||
}
|
||||
});
|
||||
|
||||
if (!response) {
|
||||
console.warn(`Service worker: No response from offscreen document, waiting before retry...`);
|
||||
await new Promise(resolve => setTimeout(resolve, 200));
|
||||
retryCount++;
|
||||
} else {
|
||||
console.log(`Service worker: Successfully communicated with offscreen document on attempt ${retryCount + 1}`);
|
||||
}
|
||||
} catch (sendError) {
|
||||
console.error(`Service worker: Error sending message to offscreen document (attempt ${retryCount + 1}):`, sendError);
|
||||
response = { success: false, error: 'Failed to communicate with offscreen document: ' + sendError.message };
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('Service worker: Final offscreen document response:', response);
|
||||
|
||||
if (response && response.success) {
|
||||
isRecording = true;
|
||||
console.log('Service worker: Recording started successfully');
|
||||
|
||||
// Notify popup of state change
|
||||
try {
|
||||
chrome.runtime.sendMessage({
|
||||
type: 'recording-state',
|
||||
isRecording: true
|
||||
});
|
||||
} catch (e) {
|
||||
console.warn('Service worker: Could not notify popup of state change:', e);
|
||||
}
|
||||
|
||||
return { success: true };
|
||||
} else {
|
||||
console.log('Service worker: Offscreen document returned failure');
|
||||
return { success: false, error: response?.error || 'Failed to start recording in offscreen document' };
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Service worker: Exception in startTabCapture:', error);
|
||||
return { success: false, error: 'Exception: ' + error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function stopTabCapture() {
|
||||
try {
|
||||
if (!isRecording) {
|
||||
return { success: false, error: 'Not currently recording' };
|
||||
}
|
||||
|
||||
// Send message to offscreen document to stop capture
|
||||
const response = await chrome.runtime.sendMessage({
|
||||
type: 'stop-recording',
|
||||
target: 'offscreen'
|
||||
});
|
||||
|
||||
isRecording = false;
|
||||
currentStreamId = null;
|
||||
|
||||
// Notify popup of state change
|
||||
try {
|
||||
chrome.runtime.sendMessage({
|
||||
type: 'recording-state',
|
||||
isRecording: false
|
||||
});
|
||||
} catch (e) {
|
||||
// Popup might be closed, ignore error
|
||||
}
|
||||
|
||||
return { success: true };
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error stopping tab capture:', error);
|
||||
isRecording = false;
|
||||
currentStreamId = null;
|
||||
return { success: false, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
// Handle messages from offscreen document
|
||||
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
|
||||
if (message.target === 'service-worker') {
|
||||
switch (message.type) {
|
||||
case 'recording-stopped':
|
||||
isRecording = false;
|
||||
currentStreamId = null;
|
||||
|
||||
// Notify popup
|
||||
try {
|
||||
chrome.runtime.sendMessage({
|
||||
type: 'recording-state',
|
||||
isRecording: false
|
||||
});
|
||||
} catch (e) {
|
||||
// Popup might be closed, ignore error
|
||||
}
|
||||
break;
|
||||
|
||||
case 'recording-error':
|
||||
isRecording = false;
|
||||
currentStreamId = null;
|
||||
|
||||
// Notify popup
|
||||
try {
|
||||
chrome.runtime.sendMessage({
|
||||
type: 'status-update',
|
||||
status: 'error',
|
||||
message: message.error || 'Recording error occurred'
|
||||
});
|
||||
} catch (e) {
|
||||
// Popup might be closed, ignore error
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
29
chrome-extension/sidepanel.js
Normal file
29
chrome-extension/sidepanel.js
Normal file
@@ -0,0 +1,29 @@
|
||||
console.log("sidepanel.js");
|
||||
|
||||
async function run() {
|
||||
const micPermission = await navigator.permissions.query({
|
||||
name: "microphone",
|
||||
});
|
||||
|
||||
document.getElementById(
|
||||
"audioPermission"
|
||||
).innerText = `MICROPHONE: ${micPermission.state}`;
|
||||
|
||||
if (micPermission.state !== "granted") {
|
||||
chrome.tabs.create({ url: "requestPermissions.html" });
|
||||
}
|
||||
|
||||
const intervalId = setInterval(async () => {
|
||||
const micPermission = await navigator.permissions.query({
|
||||
name: "microphone",
|
||||
});
|
||||
if (micPermission.state === "granted") {
|
||||
document.getElementById(
|
||||
"audioPermission"
|
||||
).innerText = `MICROPHONE: ${micPermission.state}`;
|
||||
clearInterval(intervalId);
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
|
||||
void run();
|
||||
469
chrome-extension/web/live_transcription.css
Normal file
469
chrome-extension/web/live_transcription.css
Normal file
@@ -0,0 +1,469 @@
|
||||
:root {
|
||||
--bg: #ffffff;
|
||||
--text: #111111;
|
||||
--muted: #666666;
|
||||
--border: #e5e5e5;
|
||||
--chip-bg: rgba(0, 0, 0, 0.04);
|
||||
--chip-text: #000000;
|
||||
--spinner-border: #8d8d8d5c;
|
||||
--spinner-top: #b0b0b0;
|
||||
--silence-bg: #f3f3f3;
|
||||
--loading-bg: rgba(255, 77, 77, 0.06);
|
||||
--button-bg: #ffffff;
|
||||
--button-border: #e9e9e9;
|
||||
--wave-stroke: #000000;
|
||||
--label-dia-text: #868686;
|
||||
--label-trans-text: #111111;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root:not([data-theme="light"]) {
|
||||
--bg: #0b0b0b;
|
||||
--text: #e6e6e6;
|
||||
--muted: #9aa0a6;
|
||||
--border: #333333;
|
||||
--chip-bg: rgba(255, 255, 255, 0.08);
|
||||
--chip-text: #e6e6e6;
|
||||
--spinner-border: #555555;
|
||||
--spinner-top: #dddddd;
|
||||
--silence-bg: #1a1a1a;
|
||||
--loading-bg: rgba(255, 77, 77, 0.12);
|
||||
--button-bg: #111111;
|
||||
--button-border: #333333;
|
||||
--wave-stroke: #e6e6e6;
|
||||
--label-dia-text: #b3b3b3;
|
||||
--label-trans-text: #ffffff;
|
||||
}
|
||||
}
|
||||
|
||||
:root[data-theme="dark"] {
|
||||
--bg: #0b0b0b;
|
||||
--text: #e6e6e6;
|
||||
--muted: #9aa0a6;
|
||||
--border: #333333;
|
||||
--chip-bg: rgba(255, 255, 255, 0.08);
|
||||
--chip-text: #e6e6e6;
|
||||
--spinner-border: #555555;
|
||||
--spinner-top: #dddddd;
|
||||
--silence-bg: #1a1a1a;
|
||||
--loading-bg: rgba(255, 77, 77, 0.12);
|
||||
--button-bg: #111111;
|
||||
--button-border: #333333;
|
||||
--wave-stroke: #e6e6e6;
|
||||
--label-dia-text: #b3b3b3;
|
||||
--label-trans-text: #ffffff;
|
||||
}
|
||||
|
||||
:root[data-theme="light"] {
|
||||
--bg: #ffffff;
|
||||
--text: #111111;
|
||||
--muted: #666666;
|
||||
--border: #e5e5e5;
|
||||
--chip-bg: rgba(0, 0, 0, 0.04);
|
||||
--chip-text: #000000;
|
||||
--spinner-border: #8d8d8d5c;
|
||||
--spinner-top: #b0b0b0;
|
||||
--silence-bg: #f3f3f3;
|
||||
--loading-bg: rgba(255, 77, 77, 0.06);
|
||||
--button-bg: #ffffff;
|
||||
--button-border: #e9e9e9;
|
||||
--wave-stroke: #000000;
|
||||
--label-dia-text: #868686;
|
||||
--label-trans-text: #111111;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: ui-sans-serif, system-ui, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji';
|
||||
margin: 20px;
|
||||
text-align: center;
|
||||
background-color: var(--bg);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
/* Record button */
|
||||
#recordButton {
|
||||
width: 50px;
|
||||
height: 50px;
|
||||
border: none;
|
||||
border-radius: 50%;
|
||||
background-color: var(--button-bg);
|
||||
cursor: pointer;
|
||||
transition: all 0.3s ease;
|
||||
border: 1px solid var(--button-border);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
#recordButton.recording {
|
||||
width: 180px;
|
||||
border-radius: 40px;
|
||||
justify-content: flex-start;
|
||||
padding-left: 20px;
|
||||
}
|
||||
|
||||
#recordButton:active {
|
||||
transform: scale(0.95);
|
||||
}
|
||||
|
||||
.shape-container {
|
||||
width: 25px;
|
||||
height: 25px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.shape {
|
||||
width: 25px;
|
||||
height: 25px;
|
||||
background-color: rgb(209, 61, 53);
|
||||
border-radius: 50%;
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
|
||||
#recordButton:disabled .shape {
|
||||
background-color: #6e6d6d;
|
||||
}
|
||||
|
||||
#recordButton.recording .shape {
|
||||
border-radius: 5px;
|
||||
width: 25px;
|
||||
height: 25px;
|
||||
}
|
||||
|
||||
/* Recording elements */
|
||||
.recording-info {
|
||||
display: none;
|
||||
align-items: center;
|
||||
margin-left: 15px;
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
#recordButton.recording .recording-info {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.wave-container {
|
||||
width: 60px;
|
||||
height: 30px;
|
||||
position: relative;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
#waveCanvas {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.timer {
|
||||
font-size: 14px;
|
||||
font-weight: 500;
|
||||
color: var(--text);
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
#status {
|
||||
margin-top: 20px;
|
||||
font-size: 16px;
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
/* Settings */
|
||||
.settings-container {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
gap: 15px;
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
.settings {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
align-items: flex-start;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.field {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
gap: 3px;
|
||||
}
|
||||
|
||||
#chunkSelector,
|
||||
#websocketInput,
|
||||
#themeSelector,
|
||||
#microphoneSelect {
|
||||
font-size: 16px;
|
||||
padding: 5px 8px;
|
||||
border-radius: 8px;
|
||||
border: 1px solid var(--border);
|
||||
background-color: var(--button-bg);
|
||||
color: var(--text);
|
||||
max-height: 30px;
|
||||
}
|
||||
|
||||
#microphoneSelect {
|
||||
width: 100%;
|
||||
max-width: 190px;
|
||||
min-width: 120px;
|
||||
}
|
||||
|
||||
#chunkSelector:focus,
|
||||
#websocketInput:focus,
|
||||
#themeSelector:focus,
|
||||
#microphoneSelect:focus {
|
||||
outline: none;
|
||||
border-color: #007bff;
|
||||
box-shadow: 0 0 0 3px rgba(0, 123, 255, 0.15);
|
||||
}
|
||||
|
||||
label {
|
||||
font-size: 13px;
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
.ws-default {
|
||||
font-size: 12px;
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
/* Segmented pill control for Theme */
|
||||
.segmented {
|
||||
display: inline-flex;
|
||||
align-items: stretch;
|
||||
border: 1px solid var(--button-border);
|
||||
background-color: var(--button-bg);
|
||||
border-radius: 999px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.segmented input[type="radio"] {
|
||||
position: absolute;
|
||||
opacity: 0;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
.theme-selector-container {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
margin-top: 17px;
|
||||
}
|
||||
|
||||
.segmented label {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
padding: 6px 12px;
|
||||
font-size: 14px;
|
||||
color: var(--muted);
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
transition: background-color 0.2s ease, color 0.2s ease;
|
||||
}
|
||||
|
||||
.segmented label span {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.segmented label:hover span {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
.segmented label:hover {
|
||||
background-color: var(--chip-bg);
|
||||
}
|
||||
|
||||
.segmented img {
|
||||
width: 16px;
|
||||
height: 16px;
|
||||
}
|
||||
|
||||
.segmented input[type="radio"]:checked + label {
|
||||
background-color: var(--chip-bg);
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.segmented input[type="radio"]:focus-visible + label,
|
||||
.segmented input[type="radio"]:focus + label {
|
||||
outline: 2px solid #007bff;
|
||||
outline-offset: 2px;
|
||||
border-radius: 999px;
|
||||
}
|
||||
|
||||
/* Transcript area */
|
||||
#linesTranscript {
|
||||
margin: 20px auto;
|
||||
max-width: 700px;
|
||||
text-align: left;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
#linesTranscript p {
|
||||
margin: 0px 0;
|
||||
}
|
||||
|
||||
#linesTranscript strong {
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
#speaker {
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 100px;
|
||||
padding: 2px 10px;
|
||||
font-size: 14px;
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
.label_diarization {
|
||||
background-color: var(--chip-bg);
|
||||
border-radius: 8px 8px 8px 8px;
|
||||
padding: 2px 10px;
|
||||
margin-left: 10px;
|
||||
display: inline-block;
|
||||
white-space: nowrap;
|
||||
font-size: 14px;
|
||||
margin-bottom: 0px;
|
||||
color: var(--label-dia-text);
|
||||
}
|
||||
|
||||
.label_transcription {
|
||||
background-color: var(--chip-bg);
|
||||
border-radius: 8px 8px 8px 8px;
|
||||
padding: 2px 10px;
|
||||
display: inline-block;
|
||||
white-space: nowrap;
|
||||
margin-left: 10px;
|
||||
font-size: 14px;
|
||||
margin-bottom: 0px;
|
||||
color: var(--label-trans-text);
|
||||
}
|
||||
|
||||
#timeInfo {
|
||||
color: var(--muted);
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
.textcontent {
|
||||
font-size: 16px;
|
||||
padding-left: 10px;
|
||||
margin-bottom: 10px;
|
||||
margin-top: 1px;
|
||||
padding-top: 5px;
|
||||
border-radius: 0px 0px 0px 10px;
|
||||
}
|
||||
|
||||
.buffer_diarization {
|
||||
color: var(--label-dia-text);
|
||||
margin-left: 4px;
|
||||
}
|
||||
|
||||
.buffer_transcription {
|
||||
color: #7474748c;
|
||||
margin-left: 4px;
|
||||
}
|
||||
|
||||
.spinner {
|
||||
display: inline-block;
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border: 2px solid var(--spinner-border);
|
||||
border-top: 2px solid var(--spinner-top);
|
||||
border-radius: 50%;
|
||||
animation: spin 0.7s linear infinite;
|
||||
vertical-align: middle;
|
||||
margin-bottom: 2px;
|
||||
margin-right: 5px;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to {
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
|
||||
.silence {
|
||||
color: var(--muted);
|
||||
background-color: var(--silence-bg);
|
||||
font-size: 13px;
|
||||
border-radius: 30px;
|
||||
padding: 2px 10px;
|
||||
}
|
||||
|
||||
.loading {
|
||||
color: var(--muted);
|
||||
background-color: var(--loading-bg);
|
||||
border-radius: 8px 8px 8px 0px;
|
||||
padding: 2px 10px;
|
||||
font-size: 14px;
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
/* for smaller screens */
|
||||
@media (max-width: 768px) {
|
||||
.settings-container {
|
||||
flex-direction: column;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.settings {
|
||||
justify-content: center;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.field {
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
#websocketInput,
|
||||
#microphoneSelect {
|
||||
min-width: 200px;
|
||||
max-width: 400px;
|
||||
}
|
||||
|
||||
.theme-selector-container {
|
||||
margin-top: 10px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
body {
|
||||
margin: 10px;
|
||||
}
|
||||
|
||||
.settings {
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
#websocketInput,
|
||||
#microphoneSelect {
|
||||
max-width: 400px;
|
||||
}
|
||||
|
||||
.segmented label {
|
||||
padding: 4px 8px;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.segmented img {
|
||||
width: 14px;
|
||||
height: 14px;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
html
|
||||
{
|
||||
width: 400px; /* max: 800px */
|
||||
height: 600px; /* max: 600px */
|
||||
border-radius: 10px;
|
||||
|
||||
}
|
||||
619
chrome-extension/web/live_transcription.js
Normal file
619
chrome-extension/web/live_transcription.js
Normal file
@@ -0,0 +1,619 @@
|
||||
/* Theme, WebSocket, recording, rendering logic extracted from inline script and adapted for segmented theme control and WS caption */
|
||||
let isRecording = false;
|
||||
let websocket = null;
|
||||
let recorder = null;
|
||||
let chunkDuration = 100;
|
||||
let websocketUrl = "ws://localhost:8000/asr";
|
||||
let userClosing = false;
|
||||
let wakeLock = null;
|
||||
let startTime = null;
|
||||
let timerInterval = null;
|
||||
let audioContext = null;
|
||||
let analyser = null;
|
||||
let microphone = null;
|
||||
let waveCanvas = document.getElementById("waveCanvas");
|
||||
let waveCtx = waveCanvas.getContext("2d");
|
||||
let animationFrame = null;
|
||||
let waitingForStop = false;
|
||||
let lastReceivedData = null;
|
||||
let lastSignature = null;
|
||||
let availableMicrophones = [];
|
||||
let selectedMicrophoneId = null;
|
||||
|
||||
waveCanvas.width = 60 * (window.devicePixelRatio || 1);
|
||||
waveCanvas.height = 30 * (window.devicePixelRatio || 1);
|
||||
waveCtx.scale(window.devicePixelRatio || 1, window.devicePixelRatio || 1);
|
||||
|
||||
const statusText = document.getElementById("status");
|
||||
const recordButton = document.getElementById("recordButton");
|
||||
const chunkSelector = document.getElementById("chunkSelector");
|
||||
const websocketInput = document.getElementById("websocketInput");
|
||||
const websocketDefaultSpan = document.getElementById("wsDefaultUrl");
|
||||
const linesTranscriptDiv = document.getElementById("linesTranscript");
|
||||
const timerElement = document.querySelector(".timer");
|
||||
const themeRadios = document.querySelectorAll('input[name="theme"]');
|
||||
const microphoneSelect = document.getElementById("microphoneSelect");
|
||||
|
||||
function getWaveStroke() {
|
||||
const styles = getComputedStyle(document.documentElement);
|
||||
const v = styles.getPropertyValue("--wave-stroke").trim();
|
||||
return v || "#000";
|
||||
}
|
||||
|
||||
let waveStroke = getWaveStroke();
|
||||
function updateWaveStroke() {
|
||||
waveStroke = getWaveStroke();
|
||||
}
|
||||
|
||||
function applyTheme(pref) {
|
||||
if (pref === "light") {
|
||||
document.documentElement.setAttribute("data-theme", "light");
|
||||
} else if (pref === "dark") {
|
||||
document.documentElement.setAttribute("data-theme", "dark");
|
||||
} else {
|
||||
document.documentElement.removeAttribute("data-theme");
|
||||
}
|
||||
updateWaveStroke();
|
||||
}
|
||||
|
||||
// Persisted theme preference
|
||||
const savedThemePref = localStorage.getItem("themePreference") || "system";
|
||||
applyTheme(savedThemePref);
|
||||
if (themeRadios.length) {
|
||||
themeRadios.forEach((r) => {
|
||||
r.checked = r.value === savedThemePref;
|
||||
r.addEventListener("change", () => {
|
||||
if (r.checked) {
|
||||
localStorage.setItem("themePreference", r.value);
|
||||
applyTheme(r.value);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// React to OS theme changes when in "system" mode
|
||||
const darkMq = window.matchMedia && window.matchMedia("(prefers-color-scheme: dark)");
|
||||
const handleOsThemeChange = () => {
|
||||
const pref = localStorage.getItem("themePreference") || "system";
|
||||
if (pref === "system") updateWaveStroke();
|
||||
};
|
||||
if (darkMq && darkMq.addEventListener) {
|
||||
darkMq.addEventListener("change", handleOsThemeChange);
|
||||
} else if (darkMq && darkMq.addListener) {
|
||||
// deprecated, but included for Safari compatibility
|
||||
darkMq.addListener(handleOsThemeChange);
|
||||
}
|
||||
|
||||
async function enumerateMicrophones() {
|
||||
try {
|
||||
const micPermission = await navigator.permissions.query({
|
||||
name: "microphone",
|
||||
});
|
||||
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
stream.getTracks().forEach(track => track.stop());
|
||||
|
||||
const devices = await navigator.mediaDevices.enumerateDevices();
|
||||
availableMicrophones = devices.filter(device => device.kind === 'audioinput');
|
||||
|
||||
populateMicrophoneSelect();
|
||||
console.log(`Found ${availableMicrophones.length} microphone(s)`);
|
||||
} catch (error) {
|
||||
console.error('Error enumerating microphones:', error);
|
||||
statusText.textContent = "Error accessing microphones. Please grant permission.";
|
||||
}
|
||||
}
|
||||
|
||||
function populateMicrophoneSelect() {
|
||||
if (!microphoneSelect) return;
|
||||
|
||||
microphoneSelect.innerHTML = '<option value="">Default Microphone</option>';
|
||||
|
||||
availableMicrophones.forEach((device, index) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.textContent = device.label || `Microphone ${index + 1}`;
|
||||
microphoneSelect.appendChild(option);
|
||||
});
|
||||
|
||||
const savedMicId = localStorage.getItem('selectedMicrophone');
|
||||
if (savedMicId && availableMicrophones.some(mic => mic.deviceId === savedMicId)) {
|
||||
microphoneSelect.value = savedMicId;
|
||||
selectedMicrophoneId = savedMicId;
|
||||
}
|
||||
}
|
||||
|
||||
function handleMicrophoneChange() {
|
||||
selectedMicrophoneId = microphoneSelect.value || null;
|
||||
localStorage.setItem('selectedMicrophone', selectedMicrophoneId || '');
|
||||
|
||||
const selectedDevice = availableMicrophones.find(mic => mic.deviceId === selectedMicrophoneId);
|
||||
const deviceName = selectedDevice ? selectedDevice.label : 'Default Microphone';
|
||||
|
||||
console.log(`Selected microphone: ${deviceName}`);
|
||||
statusText.textContent = `Microphone changed to: ${deviceName}`;
|
||||
|
||||
if (isRecording) {
|
||||
statusText.textContent = "Switching microphone... Please wait.";
|
||||
stopRecording().then(() => {
|
||||
setTimeout(() => {
|
||||
toggleRecording();
|
||||
}, 1000);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Helpers
|
||||
function fmt1(x) {
|
||||
const n = Number(x);
|
||||
return Number.isFinite(n) ? n.toFixed(1) : x;
|
||||
}
|
||||
|
||||
// Default WebSocket URL computation
|
||||
const host = window.location.hostname || "localhost";
|
||||
const port = window.location.port;
|
||||
const protocol = window.location.protocol === "https:" ? "wss" : "ws";
|
||||
const defaultWebSocketUrl = websocketUrl;
|
||||
|
||||
// Populate default caption and input
|
||||
if (websocketDefaultSpan) websocketDefaultSpan.textContent = defaultWebSocketUrl;
|
||||
websocketInput.value = defaultWebSocketUrl;
|
||||
websocketUrl = defaultWebSocketUrl;
|
||||
|
||||
// Optional chunk selector (guard for presence)
|
||||
if (chunkSelector) {
|
||||
chunkSelector.addEventListener("change", () => {
|
||||
chunkDuration = parseInt(chunkSelector.value);
|
||||
});
|
||||
}
|
||||
|
||||
// WebSocket input change handling
|
||||
websocketInput.addEventListener("change", () => {
|
||||
const urlValue = websocketInput.value.trim();
|
||||
if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) {
|
||||
statusText.textContent = "Invalid WebSocket URL (must start with ws:// or wss://)";
|
||||
return;
|
||||
}
|
||||
websocketUrl = urlValue;
|
||||
statusText.textContent = "WebSocket URL updated. Ready to connect.";
|
||||
});
|
||||
|
||||
function setupWebSocket() {
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
websocket = new WebSocket(websocketUrl);
|
||||
} catch (error) {
|
||||
statusText.textContent = "Invalid WebSocket URL. Please check and try again.";
|
||||
reject(error);
|
||||
return;
|
||||
}
|
||||
|
||||
websocket.onopen = () => {
|
||||
statusText.textContent = "Connected to server.";
|
||||
resolve();
|
||||
};
|
||||
|
||||
websocket.onclose = () => {
|
||||
if (userClosing) {
|
||||
if (waitingForStop) {
|
||||
statusText.textContent = "Processing finalized or connection closed.";
|
||||
if (lastReceivedData) {
|
||||
renderLinesWithBuffer(
|
||||
lastReceivedData.lines || [],
|
||||
lastReceivedData.buffer_diarization || "",
|
||||
lastReceivedData.buffer_transcription || "",
|
||||
0,
|
||||
0,
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
statusText.textContent = "Disconnected from the WebSocket server. (Check logs if model is loading.)";
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
}
|
||||
}
|
||||
isRecording = false;
|
||||
waitingForStop = false;
|
||||
userClosing = false;
|
||||
lastReceivedData = null;
|
||||
websocket = null;
|
||||
updateUI();
|
||||
};
|
||||
|
||||
websocket.onerror = () => {
|
||||
statusText.textContent = "Error connecting to WebSocket.";
|
||||
reject(new Error("Error connecting to WebSocket"));
|
||||
};
|
||||
|
||||
websocket.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
|
||||
if (data.type === "ready_to_stop") {
|
||||
console.log("Ready to stop received, finalizing display and closing WebSocket.");
|
||||
waitingForStop = false;
|
||||
|
||||
if (lastReceivedData) {
|
||||
renderLinesWithBuffer(
|
||||
lastReceivedData.lines || [],
|
||||
lastReceivedData.buffer_diarization || "",
|
||||
lastReceivedData.buffer_transcription || "",
|
||||
0,
|
||||
0,
|
||||
true
|
||||
);
|
||||
}
|
||||
statusText.textContent = "Finished processing audio! Ready to record again.";
|
||||
recordButton.disabled = false;
|
||||
|
||||
if (websocket) {
|
||||
websocket.close();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
lastReceivedData = data;
|
||||
|
||||
const {
|
||||
lines = [],
|
||||
buffer_transcription = "",
|
||||
buffer_diarization = "",
|
||||
remaining_time_transcription = 0,
|
||||
remaining_time_diarization = 0,
|
||||
status = "active_transcription",
|
||||
} = data;
|
||||
|
||||
renderLinesWithBuffer(
|
||||
lines,
|
||||
buffer_diarization,
|
||||
buffer_transcription,
|
||||
remaining_time_diarization,
|
||||
remaining_time_transcription,
|
||||
false,
|
||||
status
|
||||
);
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function renderLinesWithBuffer(
|
||||
lines,
|
||||
buffer_diarization,
|
||||
buffer_transcription,
|
||||
remaining_time_diarization,
|
||||
remaining_time_transcription,
|
||||
isFinalizing = false,
|
||||
current_status = "active_transcription"
|
||||
) {
|
||||
if (current_status === "no_audio_detected") {
|
||||
linesTranscriptDiv.innerHTML =
|
||||
"<p style='text-align: center; color: var(--muted); margin-top: 20px;'><em>No audio detected...</em></p>";
|
||||
return;
|
||||
}
|
||||
|
||||
const showLoading = !isFinalizing && (lines || []).some((it) => it.speaker == 0);
|
||||
const showTransLag = !isFinalizing && remaining_time_transcription > 0;
|
||||
const showDiaLag = !isFinalizing && !!buffer_diarization && remaining_time_diarization > 0;
|
||||
const signature = JSON.stringify({
|
||||
lines: (lines || []).map((it) => ({ speaker: it.speaker, text: it.text, beg: it.beg, end: it.end })),
|
||||
buffer_transcription: buffer_transcription || "",
|
||||
buffer_diarization: buffer_diarization || "",
|
||||
status: current_status,
|
||||
showLoading,
|
||||
showTransLag,
|
||||
showDiaLag,
|
||||
isFinalizing: !!isFinalizing,
|
||||
});
|
||||
if (lastSignature === signature) {
|
||||
const t = document.querySelector(".lag-transcription-value");
|
||||
if (t) t.textContent = fmt1(remaining_time_transcription);
|
||||
const d = document.querySelector(".lag-diarization-value");
|
||||
if (d) d.textContent = fmt1(remaining_time_diarization);
|
||||
const ld = document.querySelector(".loading-diarization-value");
|
||||
if (ld) ld.textContent = fmt1(remaining_time_diarization);
|
||||
return;
|
||||
}
|
||||
lastSignature = signature;
|
||||
|
||||
const linesHtml = (lines || [])
|
||||
.map((item, idx) => {
|
||||
let timeInfo = "";
|
||||
if (item.beg !== undefined && item.end !== undefined) {
|
||||
timeInfo = ` ${item.beg} - ${item.end}`;
|
||||
}
|
||||
|
||||
let speakerLabel = "";
|
||||
if (item.speaker === -2) {
|
||||
speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
|
||||
} else if (item.speaker == 0 && !isFinalizing) {
|
||||
speakerLabel = `<span class='loading'><span class="spinner"></span><span id='timeInfo'><span class="loading-diarization-value">${fmt1(
|
||||
remaining_time_diarization
|
||||
)}</span> second(s) of audio are undergoing diarization</span></span>`;
|
||||
} else if (item.speaker !== 0) {
|
||||
speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
|
||||
}
|
||||
|
||||
let currentLineText = item.text || "";
|
||||
|
||||
if (idx === lines.length - 1) {
|
||||
if (!isFinalizing && item.speaker !== -2) {
|
||||
if (remaining_time_transcription > 0) {
|
||||
speakerLabel += `<span class="label_transcription"><span class="spinner"></span>Lag <span id='timeInfo'><span class="lag-transcription-value">${fmt1(
|
||||
remaining_time_transcription
|
||||
)}</span>s</span></span>`;
|
||||
}
|
||||
if (buffer_diarization && remaining_time_diarization > 0) {
|
||||
speakerLabel += `<span class="label_diarization"><span class="spinner"></span>Lag<span id='timeInfo'><span class="lag-diarization-value">${fmt1(
|
||||
remaining_time_diarization
|
||||
)}</span>s</span></span>`;
|
||||
}
|
||||
}
|
||||
|
||||
if (buffer_diarization) {
|
||||
if (isFinalizing) {
|
||||
currentLineText +=
|
||||
(currentLineText.length > 0 && buffer_diarization.trim().length > 0 ? " " : "") + buffer_diarization.trim();
|
||||
} else {
|
||||
currentLineText += `<span class="buffer_diarization">${buffer_diarization}</span>`;
|
||||
}
|
||||
}
|
||||
if (buffer_transcription) {
|
||||
if (isFinalizing) {
|
||||
currentLineText +=
|
||||
(currentLineText.length > 0 && buffer_transcription.trim().length > 0 ? " " : "") +
|
||||
buffer_transcription.trim();
|
||||
} else {
|
||||
currentLineText += `<span class="buffer_transcription">${buffer_transcription}</span>`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return currentLineText.trim().length > 0 || speakerLabel.length > 0
|
||||
? `<p>${speakerLabel}<br/><div class='textcontent'>${currentLineText}</div></p>`
|
||||
: `<p>${speakerLabel}<br/></p>`;
|
||||
})
|
||||
.join("");
|
||||
|
||||
linesTranscriptDiv.innerHTML = linesHtml;
|
||||
window.scrollTo({ top: document.body.scrollHeight, behavior: "smooth" });
|
||||
}
|
||||
|
||||
function updateTimer() {
|
||||
if (!startTime) return;
|
||||
|
||||
const elapsed = Math.floor((Date.now() - startTime) / 1000);
|
||||
const minutes = Math.floor(elapsed / 60).toString().padStart(2, "0");
|
||||
const seconds = (elapsed % 60).toString().padStart(2, "0");
|
||||
timerElement.textContent = `${minutes}:${seconds}`;
|
||||
}
|
||||
|
||||
function drawWaveform() {
|
||||
if (!analyser) return;
|
||||
|
||||
const bufferLength = analyser.frequencyBinCount;
|
||||
const dataArray = new Uint8Array(bufferLength);
|
||||
analyser.getByteTimeDomainData(dataArray);
|
||||
|
||||
waveCtx.clearRect(
|
||||
0,
|
||||
0,
|
||||
waveCanvas.width / (window.devicePixelRatio || 1),
|
||||
waveCanvas.height / (window.devicePixelRatio || 1)
|
||||
);
|
||||
waveCtx.lineWidth = 1;
|
||||
waveCtx.strokeStyle = waveStroke;
|
||||
waveCtx.beginPath();
|
||||
|
||||
const sliceWidth = (waveCanvas.width / (window.devicePixelRatio || 1)) / bufferLength;
|
||||
let x = 0;
|
||||
|
||||
for (let i = 0; i < bufferLength; i++) {
|
||||
const v = dataArray[i] / 128.0;
|
||||
const y = (v * (waveCanvas.height / (window.devicePixelRatio || 1))) / 2;
|
||||
|
||||
if (i === 0) {
|
||||
waveCtx.moveTo(x, y);
|
||||
} else {
|
||||
waveCtx.lineTo(x, y);
|
||||
}
|
||||
|
||||
x += sliceWidth;
|
||||
}
|
||||
|
||||
waveCtx.lineTo(
|
||||
waveCanvas.width / (window.devicePixelRatio || 1),
|
||||
(waveCanvas.height / (window.devicePixelRatio || 1)) / 2
|
||||
);
|
||||
waveCtx.stroke();
|
||||
|
||||
animationFrame = requestAnimationFrame(drawWaveform);
|
||||
}
|
||||
|
||||
async function startRecording() {
|
||||
try {
|
||||
try {
|
||||
wakeLock = await navigator.wakeLock.request("screen");
|
||||
} catch (err) {
|
||||
console.log("Error acquiring wake lock.");
|
||||
}
|
||||
|
||||
let stream;
|
||||
try {
|
||||
// Try tab capture first
|
||||
stream = await new Promise((resolve, reject) => {
|
||||
chrome.tabCapture.capture({audio: true}, (s) => {
|
||||
if (s) {
|
||||
resolve(s);
|
||||
} else {
|
||||
reject(new Error('Tab capture failed or not available'));
|
||||
}
|
||||
});
|
||||
});
|
||||
statusText.textContent = "Using tab audio capture.";
|
||||
} catch (tabError) {
|
||||
console.log('Tab capture not available, falling back to microphone', tabError);
|
||||
// Fallback to microphone
|
||||
const audioConstraints = selectedMicrophoneId
|
||||
? { audio: { deviceId: { exact: selectedMicrophoneId } } }
|
||||
: { audio: true };
|
||||
stream = await navigator.mediaDevices.getUserMedia(audioConstraints);
|
||||
statusText.textContent = "Using microphone audio.";
|
||||
}
|
||||
|
||||
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
analyser = audioContext.createAnalyser();
|
||||
analyser.fftSize = 256;
|
||||
microphone = audioContext.createMediaStreamSource(stream);
|
||||
microphone.connect(analyser);
|
||||
|
||||
recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
|
||||
recorder.ondataavailable = (e) => {
|
||||
if (websocket && websocket.readyState === WebSocket.OPEN) {
|
||||
websocket.send(e.data);
|
||||
}
|
||||
};
|
||||
recorder.start(chunkDuration);
|
||||
|
||||
startTime = Date.now();
|
||||
timerInterval = setInterval(updateTimer, 1000);
|
||||
drawWaveform();
|
||||
|
||||
isRecording = true;
|
||||
updateUI();
|
||||
} catch (err) {
|
||||
if (window.location.hostname === "0.0.0.0") {
|
||||
statusText.textContent =
|
||||
"Error accessing audio input. Browsers may block audio access on 0.0.0.0. Try using localhost:8000 instead.";
|
||||
} else {
|
||||
statusText.textContent = "Error accessing audio input. Please check permissions.";
|
||||
}
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
|
||||
async function stopRecording() {
|
||||
if (wakeLock) {
|
||||
try {
|
||||
await wakeLock.release();
|
||||
} catch (e) {
|
||||
// ignore
|
||||
}
|
||||
wakeLock = null;
|
||||
}
|
||||
|
||||
userClosing = true;
|
||||
waitingForStop = true;
|
||||
|
||||
if (websocket && websocket.readyState === WebSocket.OPEN) {
|
||||
const emptyBlob = new Blob([], { type: "audio/webm" });
|
||||
websocket.send(emptyBlob);
|
||||
statusText.textContent = "Recording stopped. Processing final audio...";
|
||||
}
|
||||
|
||||
if (recorder) {
|
||||
recorder.stop();
|
||||
recorder = null;
|
||||
}
|
||||
|
||||
if (microphone) {
|
||||
microphone.disconnect();
|
||||
microphone = null;
|
||||
}
|
||||
|
||||
if (analyser) {
|
||||
analyser = null;
|
||||
}
|
||||
|
||||
if (audioContext && audioContext.state !== "closed") {
|
||||
try {
|
||||
await audioContext.close();
|
||||
} catch (e) {
|
||||
console.warn("Could not close audio context:", e);
|
||||
}
|
||||
audioContext = null;
|
||||
}
|
||||
|
||||
if (animationFrame) {
|
||||
cancelAnimationFrame(animationFrame);
|
||||
animationFrame = null;
|
||||
}
|
||||
|
||||
if (timerInterval) {
|
||||
clearInterval(timerInterval);
|
||||
timerInterval = null;
|
||||
}
|
||||
timerElement.textContent = "00:00";
|
||||
startTime = null;
|
||||
|
||||
isRecording = false;
|
||||
updateUI();
|
||||
}
|
||||
|
||||
async function toggleRecording() {
|
||||
if (!isRecording) {
|
||||
if (waitingForStop) {
|
||||
console.log("Waiting for stop, early return");
|
||||
return;
|
||||
}
|
||||
console.log("Connecting to WebSocket");
|
||||
try {
|
||||
if (websocket && websocket.readyState === WebSocket.OPEN) {
|
||||
await startRecording();
|
||||
} else {
|
||||
await setupWebSocket();
|
||||
await startRecording();
|
||||
}
|
||||
} catch (err) {
|
||||
statusText.textContent = "Could not connect to WebSocket or access mic. Aborted.";
|
||||
console.error(err);
|
||||
}
|
||||
} else {
|
||||
console.log("Stopping recording");
|
||||
stopRecording();
|
||||
}
|
||||
}
|
||||
|
||||
function updateUI() {
|
||||
recordButton.classList.toggle("recording", isRecording);
|
||||
recordButton.disabled = waitingForStop;
|
||||
|
||||
if (waitingForStop) {
|
||||
if (statusText.textContent !== "Recording stopped. Processing final audio...") {
|
||||
statusText.textContent = "Please wait for processing to complete...";
|
||||
}
|
||||
} else if (isRecording) {
|
||||
statusText.textContent = "Recording...";
|
||||
} else {
|
||||
if (
|
||||
statusText.textContent !== "Finished processing audio! Ready to record again." &&
|
||||
statusText.textContent !== "Processing finalized or connection closed."
|
||||
) {
|
||||
statusText.textContent = "Click to start transcription";
|
||||
}
|
||||
}
|
||||
if (!waitingForStop) {
|
||||
recordButton.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
recordButton.addEventListener("click", toggleRecording);
|
||||
|
||||
if (microphoneSelect) {
|
||||
microphoneSelect.addEventListener("change", handleMicrophoneChange);
|
||||
}
|
||||
// document.addEventListener('DOMContentLoaded', async () => {
|
||||
// try {
|
||||
// await enumerateMicrophones();
|
||||
// } catch (error) {
|
||||
// console.log("Could not enumerate microphones on load:", error);
|
||||
// }
|
||||
// });
|
||||
// navigator.mediaDevices.addEventListener('devicechange', async () => {
|
||||
// console.log('Device change detected, re-enumerating microphones');
|
||||
// try {
|
||||
// await enumerateMicrophones();
|
||||
// } catch (error) {
|
||||
// console.log("Error re-enumerating microphones:", error);
|
||||
// }
|
||||
// });
|
||||
1
chrome-extension/web/src/dark_mode.svg
Normal file
1
chrome-extension/web/src/dark_mode.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#5f6368"><path d="M480-120q-151 0-255.5-104.5T120-480q0-138 90-239.5T440-838q13-2 23 3.5t16 14.5q6 9 6.5 21t-7.5 23q-17 26-25.5 55t-8.5 61q0 90 63 153t153 63q31 0 61.5-9t54.5-25q11-7 22.5-6.5T819-479q10 5 15.5 15t3.5 24q-14 138-117.5 229T480-120Zm0-80q88 0 158-48.5T740-375q-20 5-40 8t-40 3q-123 0-209.5-86.5T364-660q0-20 3-40t8-40q-78 32-126.5 102T200-480q0 116 82 198t198 82Zm-10-270Z"/></svg>
|
||||
|
After Width: | Height: | Size: 493 B |
1
chrome-extension/web/src/light_mode.svg
Normal file
1
chrome-extension/web/src/light_mode.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#5f6368"><path d="M480-360q50 0 85-35t35-85q0-50-35-85t-85-35q-50 0-85 35t-35 85q0 50 35 85t85 35Zm0 80q-83 0-141.5-58.5T280-480q0-83 58.5-141.5T480-680q83 0 141.5 58.5T680-480q0 83-58.5 141.5T480-280ZM80-440q-17 0-28.5-11.5T40-480q0-17 11.5-28.5T80-520h80q17 0 28.5 11.5T200-480q0 17-11.5 28.5T160-440H80Zm720 0q-17 0-28.5-11.5T760-480q0-17 11.5-28.5T800-520h80q17 0 28.5 11.5T920-480q0 17-11.5 28.5T880-440h-80ZM480-760q-17 0-28.5-11.5T440-800v-80q0-17 11.5-28.5T480-920q17 0 28.5 11.5T520-880v80q0 17-11.5 28.5T480-760Zm0 720q-17 0-28.5-11.5T440-80v-80q0-17 11.5-28.5T480-200q17 0 28.5 11.5T520-160v80q0 17-11.5 28.5T480-40ZM226-678l-43-42q-12-11-11.5-28t11.5-29q12-12 29-12t28 12l42 43q11 12 11 28t-11 28q-11 12-27.5 11.5T226-678Zm494 495-42-43q-11-12-11-28.5t11-27.5q11-12 27.5-11.5T734-282l43 42q12 11 11.5 28T777-183q-12 12-29 12t-28-12Zm-42-495q-12-11-11.5-27.5T678-734l42-43q11-12 28-11.5t29 11.5q12 12 12 29t-12 28l-43 42q-12 11-28 11t-28-11ZM183-183q-12-12-12-29t12-28l43-42q12-11 28.5-11t27.5 11q12 11 11.5 27.5T282-226l-42 43q-11 12-28 11.5T183-183Zm297-297Z"/></svg>
|
||||
|
After Width: | Height: | Size: 1.2 KiB |
1
chrome-extension/web/src/system_mode.svg
Normal file
1
chrome-extension/web/src/system_mode.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#5f6368"><path d="M396-396q-32-32-58.5-67T289-537q-5 14-6.5 28.5T281-480q0 83 58 141t141 58q14 0 28.5-2t28.5-6q-39-22-74-48.5T396-396Zm85 196q-56 0-107-21t-91-61q-40-40-61-91t-21-107q0-51 17-97.5t50-84.5q13-14 32-9.5t27 24.5q21 55 52.5 104t73.5 91q42 42 91 73.5T648-326q20 8 24.5 27t-9.5 32q-38 33-84.5 50T481-200Zm223-192q-16-5-23-20.5t-4-32.5q9-48-6-94.5T621-621q-35-35-80.5-49.5T448-677q-17 3-32-4t-21-23q-6-16 1.5-31t23.5-19q69-15 138 4.5T679-678q51 51 71 120t5 138q-4 17-19 25t-32 3ZM480-840q-17 0-28.5-11.5T440-880v-40q0-17 11.5-28.5T480-960q17 0 28.5 11.5T520-920v40q0 17-11.5 28.5T480-840Zm0 840q-17 0-28.5-11.5T440-40v-40q0-17 11.5-28.5T480-120q17 0 28.5 11.5T520-80v40q0 17-11.5 28.5T480 0Zm255-734q-12-12-12-28.5t12-28.5l28-28q11-11 27.5-11t28.5 11q12 12 12 28.5T819-762l-28 28q-12 12-28 12t-28-12ZM141-141q-12-12-12-28.5t12-28.5l28-28q12-12 28-12t28 12q12 12 12 28.5T225-169l-28 28q-11 11-27.5 11T141-141Zm739-299q-17 0-28.5-11.5T840-480q0-17 11.5-28.5T880-520h40q17 0 28.5 11.5T960-480q0 17-11.5 28.5T920-440h-40Zm-840 0q-17 0-28.5-11.5T0-480q0-17 11.5-28.5T40-520h40q17 0 28.5 11.5T120-480q0 17-11.5 28.5T80-440H40Zm779 299q-12 12-28.5 12T762-141l-28-28q-12-12-12-28t12-28q12-12 28.5-12t28.5 12l28 28q11 11 11 27.5T819-141ZM226-735q-12 12-28.5 12T169-735l-28-28q-11-11-11-27.5t11-28.5q12-12 28.5-12t28.5 12l28 28q12 12 12 28t-12 28Zm170 339Z"/></svg>
|
||||
|
After Width: | Height: | Size: 1.4 KiB |
@@ -399,17 +399,17 @@ class PaddedAlignAttWhisper:
|
||||
mlx_mel_padded = mlx_log_mel_spectrogram(audio=input_segments.detach(), n_mels=self.model.dims.n_mels, padding=N_SAMPLES)
|
||||
mlx_mel = mlx_pad_or_trim(mlx_mel_padded, N_FRAMES, axis=-2)
|
||||
mlx_encoder_feature = self.mlx_encoder.encoder(mlx_mel[None])
|
||||
encoder_feature = torch.as_tensor(mlx_encoder_feature)
|
||||
encoder_feature = torch.tensor(np.array(mlx_encoder_feature))
|
||||
content_mel_len = int((mlx_mel_padded.shape[0] - mlx_mel.shape[0])/2)
|
||||
device = encoder_feature.device #'cpu' is apple silicon
|
||||
device = 'cpu'
|
||||
elif self.fw_encoder:
|
||||
audio_length_seconds = len(input_segments) / 16000
|
||||
content_mel_len = int(audio_length_seconds * 100)//2
|
||||
mel_padded_2 = self.fw_feature_extractor(waveform=input_segments.numpy(), padding=N_SAMPLES)[None, :]
|
||||
mel = fw_pad_or_trim(mel_padded_2, N_FRAMES, axis=-1)
|
||||
encoder_feature_ctranslate = self.fw_encoder.encode(mel)
|
||||
encoder_feature = torch.as_tensor(encoder_feature_ctranslate)
|
||||
device = encoder_feature.device
|
||||
encoder_feature = torch.Tensor(np.array(encoder_feature_ctranslate))
|
||||
device = 'cpu'
|
||||
else:
|
||||
# mel + padding to 30s
|
||||
mel_padded = log_mel_spectrogram(input_segments, n_mels=self.model.dims.n_mels, padding=N_SAMPLES,
|
||||
|
||||
Reference in New Issue
Block a user