mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-08 06:44:09 +00:00
Compare commits
7 Commits
0.2.17
...
0.2.17.pos
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6ae545bcb1 | ||
|
|
04980d3f5e | ||
|
|
79a705c969 | ||
|
|
34e4abd455 | ||
|
|
d59ddbaeae | ||
|
|
4dd66e7766 | ||
|
|
3db5d81a20 |
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "whisperlivekit"
|
||||
version = "0.2.17"
|
||||
version = "0.2.17.post1"
|
||||
description = "Real-time speech-to-text with speaker diarization using Whisper"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
@@ -57,6 +57,7 @@ packages = [
|
||||
"whisperlivekit",
|
||||
"whisperlivekit.diarization",
|
||||
"whisperlivekit.simul_whisper",
|
||||
"whisperlivekit.simul_whisper.mlx",
|
||||
"whisperlivekit.whisper",
|
||||
"whisperlivekit.whisper.assets",
|
||||
"whisperlivekit.whisper.normalizers",
|
||||
|
||||
@@ -32,7 +32,7 @@ async def get_all_from_queue(queue: asyncio.Queue) -> Union[object, Silence, np.
|
||||
if isinstance(first_item, Silence):
|
||||
return first_item
|
||||
items.append(first_item)
|
||||
|
||||
|
||||
while True:
|
||||
if not queue._queue:
|
||||
break
|
||||
@@ -53,15 +53,15 @@ class AudioProcessor:
|
||||
Processes audio streams for transcription and diarization.
|
||||
Handles audio processing, state management, and result formatting.
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize the audio processor with configuration, models, and state."""
|
||||
|
||||
|
||||
if 'transcription_engine' in kwargs and isinstance(kwargs['transcription_engine'], TranscriptionEngine):
|
||||
models = kwargs['transcription_engine']
|
||||
else:
|
||||
models = TranscriptionEngine(**kwargs)
|
||||
|
||||
|
||||
# Audio processing settings
|
||||
self.args = models.args
|
||||
self.sample_rate = 16000
|
||||
@@ -86,13 +86,13 @@ class AudioProcessor:
|
||||
# Models and processing
|
||||
self.asr: Any = models.asr
|
||||
self.vac: Optional[FixedVADIterator] = None
|
||||
|
||||
|
||||
if self.args.vac:
|
||||
if models.vac_session is not None:
|
||||
vac_model = OnnxWrapper(session=models.vac_session)
|
||||
self.vac = FixedVADIterator(vac_model)
|
||||
else:
|
||||
self.vac = FixedVADIterator(load_jit_vad())
|
||||
self.vac = FixedVADIterator(load_jit_vad())
|
||||
self.ffmpeg_manager: Optional[FFmpegManager] = None
|
||||
self.ffmpeg_reader_task: Optional[asyncio.Task] = None
|
||||
self._ffmpeg_error: Optional[str] = None
|
||||
@@ -106,7 +106,7 @@ class AudioProcessor:
|
||||
logger.error(f"FFmpeg error: {error_type}")
|
||||
self._ffmpeg_error = error_type
|
||||
self.ffmpeg_manager.on_error_callback = handle_ffmpeg_error
|
||||
|
||||
|
||||
self.transcription_queue: Optional[asyncio.Queue] = asyncio.Queue() if self.args.transcription else None
|
||||
self.diarization_queue: Optional[asyncio.Queue] = asyncio.Queue() if self.args.diarization else None
|
||||
self.translation_queue: Optional[asyncio.Queue] = asyncio.Queue() if self.args.target_language else None
|
||||
@@ -117,14 +117,14 @@ class AudioProcessor:
|
||||
self.translation_task: Optional[asyncio.Task] = None
|
||||
self.watchdog_task: Optional[asyncio.Task] = None
|
||||
self.all_tasks_for_cleanup: List[asyncio.Task] = []
|
||||
|
||||
|
||||
self.transcription: Optional[Any] = None
|
||||
self.translation: Optional[Any] = None
|
||||
self.diarization: Optional[Any] = None
|
||||
|
||||
if self.args.transcription:
|
||||
self.transcription = online_factory(self.args, models.asr)
|
||||
self.sep = self.transcription.asr.sep
|
||||
self.transcription = online_factory(self.args, models.asr)
|
||||
self.sep = self.transcription.asr.sep
|
||||
if self.args.diarization:
|
||||
self.diarization = online_diarization_factory(self.args, models.diarization_model)
|
||||
if models.translation_model:
|
||||
@@ -182,24 +182,24 @@ class AudioProcessor:
|
||||
def convert_pcm_to_float(self, pcm_buffer: Union[bytes, bytearray]) -> np.ndarray:
|
||||
"""Convert PCM buffer in s16le format to normalized NumPy array."""
|
||||
return np.frombuffer(pcm_buffer, dtype=np.int16).astype(np.float32) / 32768.0
|
||||
|
||||
|
||||
async def get_current_state(self) -> State:
|
||||
"""Get current state."""
|
||||
async with self.lock:
|
||||
current_time = time()
|
||||
|
||||
|
||||
remaining_transcription = 0
|
||||
if self.state.end_buffer > 0:
|
||||
remaining_transcription = max(0, round(current_time - self.beg_loop - self.state.end_buffer, 1))
|
||||
|
||||
|
||||
remaining_diarization = 0
|
||||
if self.state.tokens:
|
||||
latest_end = max(self.state.end_buffer, self.state.tokens[-1].end if self.state.tokens else 0)
|
||||
remaining_diarization = max(0, round(latest_end - self.state.end_attributed_speaker, 1))
|
||||
|
||||
|
||||
self.state.remaining_time_transcription = remaining_transcription
|
||||
self.state.remaining_time_diarization = remaining_diarization
|
||||
|
||||
|
||||
return self.state
|
||||
|
||||
async def ffmpeg_stdout_reader(self) -> None:
|
||||
@@ -255,7 +255,7 @@ class AudioProcessor:
|
||||
async def transcription_processor(self) -> None:
|
||||
"""Process audio chunks for transcription."""
|
||||
cumulative_pcm_duration_stream_time = 0.0
|
||||
|
||||
|
||||
while True:
|
||||
try:
|
||||
# item = await self.transcription_queue.get()
|
||||
@@ -311,12 +311,12 @@ class AudioProcessor:
|
||||
|
||||
if new_tokens:
|
||||
candidate_end_times.append(new_tokens[-1].end)
|
||||
|
||||
|
||||
if _buffer_transcript.end is not None:
|
||||
candidate_end_times.append(_buffer_transcript.end)
|
||||
|
||||
|
||||
candidate_end_times.append(current_audio_processed_upto)
|
||||
|
||||
|
||||
async with self.lock:
|
||||
self.state.tokens.extend(new_tokens)
|
||||
self.state.buffer_transcription = _buffer_transcript
|
||||
@@ -326,13 +326,13 @@ class AudioProcessor:
|
||||
|
||||
if self.translation_queue:
|
||||
for token in new_tokens:
|
||||
await self.translation_queue.put(token)
|
||||
await self.translation_queue.put(token)
|
||||
except Exception as e:
|
||||
logger.warning(f"Exception in transcription_processor: {e}")
|
||||
logger.warning(f"Traceback: {traceback.format_exc()}")
|
||||
if 'pcm_array' in locals() and pcm_array is not SENTINEL : # Check if pcm_array was assigned from queue
|
||||
self.transcription_queue.task_done()
|
||||
|
||||
|
||||
if self.is_stopping:
|
||||
logger.info("Transcription processor finishing due to stopping flag.")
|
||||
if self.diarization_queue:
|
||||
@@ -353,18 +353,21 @@ class AudioProcessor:
|
||||
if item.has_ended:
|
||||
self.diarization.insert_silence(item.duration)
|
||||
continue
|
||||
|
||||
self.diarization.insert_audio_chunk(item)
|
||||
diarization_segments = await self.diarization.diarize()
|
||||
self.state.new_diarization = diarization_segments
|
||||
|
||||
diar_end = 0.0
|
||||
if diarization_segments:
|
||||
diar_end = max(getattr(s, "end", 0.0) for s in diarization_segments)
|
||||
async with self.lock:
|
||||
self.state.new_diarization = diarization_segments
|
||||
self.state.end_attributed_speaker = max(self.state.end_attributed_speaker, diar_end)
|
||||
except Exception as e:
|
||||
logger.warning(f"Exception in diarization_processor: {e}")
|
||||
logger.warning(f"Traceback: {traceback.format_exc()}")
|
||||
logger.info("Diarization processor task finished.")
|
||||
|
||||
async def translation_processor(self) -> None:
|
||||
# the idea is to ignore diarization for the moment. We use only transcription tokens.
|
||||
# the idea is to ignore diarization for the moment. We use only transcription tokens.
|
||||
# And the speaker is attributed given the segments used for the translation
|
||||
# in the future we want to have different languages for each speaker etc, so it will be more complex.
|
||||
while True:
|
||||
@@ -426,22 +429,22 @@ class AudioProcessor:
|
||||
remaining_time_transcription=state.remaining_time_transcription,
|
||||
remaining_time_diarization=state.remaining_time_diarization if self.args.diarization else 0
|
||||
)
|
||||
|
||||
|
||||
should_push = (response != self.last_response_content)
|
||||
if should_push:
|
||||
yield response
|
||||
self.last_response_content = response
|
||||
|
||||
|
||||
if self.is_stopping and self._processing_tasks_done():
|
||||
logger.info("Results formatter: All upstream processors are done and in stopping state. Terminating.")
|
||||
return
|
||||
|
||||
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Exception in results_formatter. Traceback: {traceback.format_exc()}")
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
|
||||
async def create_tasks(self) -> AsyncGenerator[FrontData, None]:
|
||||
"""Create and start processing tasks."""
|
||||
self.all_tasks_for_cleanup = []
|
||||
@@ -466,21 +469,21 @@ class AudioProcessor:
|
||||
self.transcription_task = asyncio.create_task(self.transcription_processor())
|
||||
self.all_tasks_for_cleanup.append(self.transcription_task)
|
||||
processing_tasks_for_watchdog.append(self.transcription_task)
|
||||
|
||||
|
||||
if self.diarization:
|
||||
self.diarization_task = asyncio.create_task(self.diarization_processor())
|
||||
self.all_tasks_for_cleanup.append(self.diarization_task)
|
||||
processing_tasks_for_watchdog.append(self.diarization_task)
|
||||
|
||||
|
||||
if self.translation:
|
||||
self.translation_task = asyncio.create_task(self.translation_processor())
|
||||
self.all_tasks_for_cleanup.append(self.translation_task)
|
||||
processing_tasks_for_watchdog.append(self.translation_task)
|
||||
|
||||
|
||||
# Monitor overall system health
|
||||
self.watchdog_task = asyncio.create_task(self.watchdog(processing_tasks_for_watchdog))
|
||||
self.all_tasks_for_cleanup.append(self.watchdog_task)
|
||||
|
||||
|
||||
return self.results_formatter()
|
||||
|
||||
async def watchdog(self, tasks_to_monitor: List[asyncio.Task]) -> None:
|
||||
@@ -493,7 +496,7 @@ class AudioProcessor:
|
||||
return
|
||||
|
||||
await asyncio.sleep(10)
|
||||
|
||||
|
||||
for i, task in enumerate(list(tasks_remaining)):
|
||||
if task.done():
|
||||
exc = task.exception()
|
||||
@@ -503,13 +506,13 @@ class AudioProcessor:
|
||||
else:
|
||||
logger.info(f"{task_name} completed normally.")
|
||||
tasks_remaining.remove(task)
|
||||
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Watchdog task cancelled.")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Error in watchdog task: {e}", exc_info=True)
|
||||
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
"""Clean up resources when processing is complete."""
|
||||
logger.info("Starting cleanup of AudioProcessor resources.")
|
||||
@@ -517,7 +520,7 @@ class AudioProcessor:
|
||||
for task in self.all_tasks_for_cleanup:
|
||||
if task and not task.done():
|
||||
task.cancel()
|
||||
|
||||
|
||||
created_tasks = [t for t in self.all_tasks_for_cleanup if t]
|
||||
if created_tasks:
|
||||
await asyncio.gather(*created_tasks, return_exceptions=True)
|
||||
@@ -555,7 +558,7 @@ class AudioProcessor:
|
||||
if not message:
|
||||
logger.info("Empty audio message received, initiating stop sequence.")
|
||||
self.is_stopping = True
|
||||
|
||||
|
||||
if self.transcription_queue:
|
||||
await self.transcription_queue.put(SENTINEL)
|
||||
|
||||
@@ -596,7 +599,7 @@ class AudioProcessor:
|
||||
|
||||
chunk_size = min(len(self.pcm_buffer), self.max_bytes_per_sec)
|
||||
aligned_chunk_size = (chunk_size // self.bytes_per_sample) * self.bytes_per_sample
|
||||
|
||||
|
||||
if aligned_chunk_size == 0:
|
||||
return
|
||||
pcm_array = self.convert_pcm_to_float(self.pcm_buffer[:aligned_chunk_size])
|
||||
@@ -613,7 +616,7 @@ class AudioProcessor:
|
||||
if res is not None:
|
||||
if "start" in res and self.current_silence:
|
||||
await self._end_silence()
|
||||
|
||||
|
||||
if "end" in res and not self.current_silence:
|
||||
pre_silence_chunk = self._slice_before_silence(
|
||||
pcm_array, chunk_sample_start, res.get("end")
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
import sys
|
||||
import threading
|
||||
from argparse import Namespace
|
||||
|
||||
from whisperlivekit.local_agreement.online_asr import OnlineASRProcessor
|
||||
@@ -19,16 +20,26 @@ logger = logging.getLogger(__name__)
|
||||
class TranscriptionEngine:
|
||||
_instance = None
|
||||
_initialized = False
|
||||
_lock = threading.Lock() # Thread-safe singleton lock
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
# Double-checked locking pattern for thread-safe singleton
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
with cls._lock:
|
||||
# Check again inside lock to prevent race condition
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if TranscriptionEngine._initialized:
|
||||
return
|
||||
# Thread-safe initialization check
|
||||
with TranscriptionEngine._lock:
|
||||
if TranscriptionEngine._initialized:
|
||||
return
|
||||
# Set flag immediately to prevent re-initialization
|
||||
TranscriptionEngine._initialized = True
|
||||
|
||||
# Perform initialization outside lock to avoid holding lock during slow operations
|
||||
global_params = {
|
||||
"host": "localhost",
|
||||
"port": 8000,
|
||||
@@ -172,7 +183,6 @@ class TranscriptionEngine:
|
||||
}
|
||||
translation_params = update_with_kwargs(translation_params, kwargs)
|
||||
self.translation_model = load_model([self.args.lan], **translation_params) #in the future we want to handle different languages for different speakers
|
||||
TranscriptionEngine._initialized = True
|
||||
|
||||
|
||||
def online_factory(args, asr):
|
||||
|
||||
@@ -47,9 +47,24 @@ class DecoderState:
|
||||
|
||||
def clean_cache(self):
|
||||
"""Clean the kv_cache after each inference step."""
|
||||
self.kv_cache = {}
|
||||
# Explicitly delete tensor references to free GPU memory
|
||||
if self.kv_cache:
|
||||
for key in list(self.kv_cache.keys()):
|
||||
tensor = self.kv_cache.pop(key, None)
|
||||
if tensor is not None:
|
||||
del tensor
|
||||
|
||||
# Clear the dict
|
||||
self.kv_cache.clear()
|
||||
|
||||
# Force GPU cache cleanup (only if CUDA is available)
|
||||
import torch
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
if self.decoder_type == "beam" and self.inference is not None:
|
||||
self.inference.kv_cache = self.kv_cache
|
||||
# Create NEW dict instead of sharing reference
|
||||
self.inference.kv_cache = {}
|
||||
if self.token_decoder is not None:
|
||||
self.token_decoder.reset()
|
||||
|
||||
|
||||
@@ -626,8 +626,10 @@ class AlignAtt:
|
||||
|
||||
try:
|
||||
current_timestamp = l_absolute_timestamps[timestamp_idx]
|
||||
except:
|
||||
pass
|
||||
except IndexError:
|
||||
# Use last timestamp if index out of range
|
||||
logger.warning(f"Timestamp index {timestamp_idx} out of range, using last timestamp")
|
||||
current_timestamp = l_absolute_timestamps[-1] if l_absolute_timestamps else 0.0
|
||||
timestamp_idx += len(word_tokens)
|
||||
|
||||
timestamp_entry = ASRToken(
|
||||
|
||||
139
whisperlivekit/thread_safety.py
Normal file
139
whisperlivekit/thread_safety.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""
|
||||
Thread Safety Configuration for WhisperLiveKit
|
||||
|
||||
This module provides thread safety configuration and utilities.
|
||||
|
||||
Environment Variables:
|
||||
WHISPERLIVEKIT_MODEL_LOCK: Enable/disable model locking (default: 1)
|
||||
Set to "0" to disable for single-connection deployments
|
||||
|
||||
WHISPERLIVEKIT_LOCK_TIMEOUT: Lock acquisition timeout in seconds (default: 30)
|
||||
|
||||
Usage:
|
||||
# Enable model locking (default)
|
||||
export WHISPERLIVEKIT_MODEL_LOCK=1
|
||||
|
||||
# Disable for single-connection deployment
|
||||
export WHISPERLIVEKIT_MODEL_LOCK=0
|
||||
|
||||
# Custom timeout
|
||||
export WHISPERLIVEKIT_LOCK_TIMEOUT=60
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import threading
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration
|
||||
USE_MODEL_LOCK = os.environ.get("WHISPERLIVEKIT_MODEL_LOCK", "1") == "1"
|
||||
LOCK_TIMEOUT = float(os.environ.get("WHISPERLIVEKIT_LOCK_TIMEOUT", "30.0"))
|
||||
|
||||
# Global model lock
|
||||
_model_lock = threading.Lock()
|
||||
|
||||
# Log configuration on import
|
||||
if USE_MODEL_LOCK:
|
||||
logger.info(f"Model locking ENABLED (timeout: {LOCK_TIMEOUT}s)")
|
||||
logger.info("For single-connection deployments, set WHISPERLIVEKIT_MODEL_LOCK=0")
|
||||
else:
|
||||
logger.warning("Model locking DISABLED - only safe for single-connection deployments")
|
||||
|
||||
|
||||
def get_model_lock():
|
||||
"""Get the global model lock instance"""
|
||||
return _model_lock
|
||||
|
||||
|
||||
def acquire_model_lock(timeout=None):
|
||||
"""
|
||||
Acquire model lock with timeout.
|
||||
|
||||
Args:
|
||||
timeout: Lock acquisition timeout (default: use LOCK_TIMEOUT)
|
||||
|
||||
Returns:
|
||||
bool: True if lock acquired, False on timeout
|
||||
"""
|
||||
if not USE_MODEL_LOCK:
|
||||
return True
|
||||
|
||||
timeout = timeout or LOCK_TIMEOUT
|
||||
acquired = _model_lock.acquire(timeout=timeout)
|
||||
|
||||
if not acquired:
|
||||
logger.error(f"Failed to acquire model lock within {timeout}s")
|
||||
|
||||
return acquired
|
||||
|
||||
|
||||
def release_model_lock():
|
||||
"""Release model lock"""
|
||||
if not USE_MODEL_LOCK:
|
||||
return
|
||||
|
||||
try:
|
||||
_model_lock.release()
|
||||
except RuntimeError:
|
||||
# Lock not held - this is fine
|
||||
pass
|
||||
|
||||
|
||||
class ModelLockContext:
|
||||
"""Context manager for model lock"""
|
||||
|
||||
def __init__(self, timeout=None):
|
||||
self.timeout = timeout
|
||||
self.acquired = False
|
||||
|
||||
def __enter__(self):
|
||||
self.acquired = acquire_model_lock(self.timeout)
|
||||
return self.acquired
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.acquired:
|
||||
release_model_lock()
|
||||
return False
|
||||
|
||||
|
||||
# Concurrency recommendations
|
||||
RECOMMENDED_CONNECTIONS_PER_WORKER = 1 if USE_MODEL_LOCK else 1
|
||||
RECOMMENDED_WORKERS = 4
|
||||
|
||||
def print_deployment_recommendations():
|
||||
"""Print recommended deployment configuration"""
|
||||
print("\n" + "="*60)
|
||||
print("WhisperLiveKit Deployment Recommendations")
|
||||
print("="*60)
|
||||
|
||||
if USE_MODEL_LOCK:
|
||||
print("⚠️ Model locking is ENABLED")
|
||||
print(" This serializes inference across connections.")
|
||||
print()
|
||||
print("Recommended deployment:")
|
||||
print(f" gunicorn -w {RECOMMENDED_WORKERS} \\")
|
||||
print(" -k uvicorn.workers.UvicornWorker \\")
|
||||
print(" --worker-connections 1 \\")
|
||||
print(" whisperlivekit.basic_server:app")
|
||||
print()
|
||||
print("Expected capacity:")
|
||||
print(f" - {RECOMMENDED_WORKERS} concurrent users (1 per worker)")
|
||||
print(f" - Memory: ~{RECOMMENDED_WORKERS}x model size")
|
||||
else:
|
||||
print("✅ Model locking is DISABLED")
|
||||
print(" ⚠️ ONLY safe for single-connection deployments")
|
||||
print()
|
||||
print("Recommended deployment:")
|
||||
print(" uvicorn whisperlivekit.basic_server:app \\")
|
||||
print(" --host 0.0.0.0 --port 8000 \\")
|
||||
print(" --workers 1")
|
||||
print()
|
||||
print("Expected capacity:")
|
||||
print(" - 1 concurrent user only")
|
||||
|
||||
print("="*60 + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print_deployment_recommendations()
|
||||
Reference in New Issue
Block a user