From b23ef3ec3eb0b95808bb2a5714a2df984f8ac956 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Wed, 28 May 2025 11:42:26 +0200 Subject: [PATCH 1/5] refactor license for correct shields.io detection --- LICENSE | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/LICENSE b/LICENSE index 723d1de..9d4bfb1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,10 +1,6 @@ MIT License Copyright (c) 2025 Quentin Fuxa. -Based on: -- The original work by ÚFAL. License: https://github.com/ufal/whisper_streaming/blob/main/LICENSE -- The work by Snakers4 (silero-vad). License: https://github.com/snakers4/silero-vad/blob/f6b1294cb27590fb2452899df98fb234dfef1134/LICENSE -- The work in Diart by juanmc2005. License: https://github.com/juanmc2005/diart/blob/main/LICENSE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -26,8 +22,7 @@ SOFTWARE. --- -Third-party components included in this software: - -- **whisper_streaming** by ÚFAL – MIT License – https://github.com/ufal/whisper_streaming -- **silero-vad** by Snakers4 – MIT License – https://github.com/snakers4/silero-vad -- **Diart** by juanmc2005 – MIT License – https://github.com/juanmc2005/diart +Based on: +- **whisper_streaming** by ÚFAL – MIT License – https://github.com/ufal/whisper_streaming. The original work by ÚFAL. License: https://github.com/ufal/whisper_streaming/blob/main/LICENSE +- **silero-vad** by Snakers4 – MIT License – https://github.com/snakers4/silero-vad. The work by Snakers4 (silero-vad). License: https://github.com/snakers4/silero-vad/blob/f6b1294cb27590fb2452899df98fb234dfef1134/LICENSE +- **Diart** by juanmc2005 – MIT License – https://github.com/juanmc2005/diart. The work in Diart by juanmc2005. License: https://github.com/juanmc2005/diart/blob/main/LICENSE \ No newline at end of file From 46770efd6c445c1cbf00aba62fc4f2bffbc4b0bf Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Wed, 28 May 2025 11:43:18 +0200 Subject: [PATCH 2/5] correct error when using VAC --- .../whisper_streaming_custom/online_asr.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/whisperlivekit/whisper_streaming_custom/online_asr.py b/whisperlivekit/whisper_streaming_custom/online_asr.py index 1af06de..8432662 100644 --- a/whisperlivekit/whisper_streaming_custom/online_asr.py +++ b/whisperlivekit/whisper_streaming_custom/online_asr.py @@ -343,15 +343,15 @@ class OnlineASRProcessor: ) sentences.append(sentence) return sentences - def finish(self) -> Transcript: + + def finish(self) -> List[ASRToken]: """ Flush the remaining transcript when processing ends. """ remaining_tokens = self.transcript_buffer.buffer - final_transcript = self.concatenate_tokens(remaining_tokens) - logger.debug(f"Final non-committed transcript: {final_transcript}") + logger.debug(f"Final non-committed tokens: {remaining_tokens}") self.buffer_time_offset += len(self.audio_buffer) / self.SAMPLING_RATE - return final_transcript + return remaining_tokens def concatenate_tokens( self, @@ -384,7 +384,8 @@ class VACOnlineASRProcessor: def __init__(self, online_chunk_size: float, *args, **kwargs): self.online_chunk_size = online_chunk_size self.online = OnlineASRProcessor(*args, **kwargs) - + self.asr = self.online.asr + # Load a VAD model (e.g. Silero VAD) import torch model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad") @@ -455,7 +456,7 @@ class VACOnlineASRProcessor: self.buffer_offset += max(0, len(self.audio_buffer) - self.SAMPLING_RATE) self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE:] - def process_iter(self) -> Transcript: + def process_iter(self) -> List[ASRToken]: """ Depending on the VAD status and the amount of accumulated audio, process the current audio chunk. @@ -467,9 +468,9 @@ class VACOnlineASRProcessor: return self.online.process_iter() else: logger.debug("No online update, only VAD") - return Transcript(None, None, "") + return [] - def finish(self) -> Transcript: + def finish(self) -> List[ASRToken]: """Finish processing by flushing any remaining text.""" result = self.online.finish() self.current_online_chunk_buffer_size = 0 @@ -480,4 +481,4 @@ class VACOnlineASRProcessor: """ Get the unvalidated buffer in string format. """ - return self.online.concatenate_tokens(self.online.transcript_buffer.buffer).text + return self.online.concatenate_tokens(self.online.transcript_buffer.buffer) From 6797b881761acea946b0e57c498cdac8abcbc096 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Wed, 28 May 2025 11:43:30 +0200 Subject: [PATCH 3/5] Error handling for missing FFmpeg in start_ffmpeg_decoder --- whisperlivekit/audio_processor.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/whisperlivekit/audio_processor.py b/whisperlivekit/audio_processor.py index d276a68..de8d40c 100644 --- a/whisperlivekit/audio_processor.py +++ b/whisperlivekit/audio_processor.py @@ -83,10 +83,33 @@ class AudioProcessor: def start_ffmpeg_decoder(self): """Start FFmpeg process for WebM to PCM conversion.""" - return (ffmpeg.input("pipe:0", format="webm") - .output("pipe:1", format="s16le", acodec="pcm_s16le", - ac=self.channels, ar=str(self.sample_rate)) - .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)) + try: + return (ffmpeg.input("pipe:0", format="webm") + .output("pipe:1", format="s16le", acodec="pcm_s16le", + ac=self.channels, ar=str(self.sample_rate)) + .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)) + except FileNotFoundError: + error = """ + FFmpeg is not installed or not found in your system's PATH. + Please install FFmpeg to enable audio processing. + + Installation instructions: + + # Ubuntu/Debian: + sudo apt update && sudo apt install ffmpeg + + # macOS (using Homebrew): + brew install ffmpeg + + # Windows: + # 1. Download the latest static build from https://ffmpeg.org/download.html + # 2. Extract the archive (e.g., to C:\\FFmpeg). + # 3. Add the 'bin' directory (e.g., C:\\FFmpeg\\bin) to your system's PATH environment variable. + + After installation, please restart the application. + """ + logger.error(error) + raise FileNotFoundError(error) async def restart_ffmpeg(self): """Restart the FFmpeg process after failure.""" From 94bb05d53e2aba1a001d53e764c5dd11e6cb4543 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa <38427957+QuentinFuxa@users.noreply.github.com> Date: Wed, 28 May 2025 11:48:46 +0200 Subject: [PATCH 4/5] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1a0c1e5..185a6ec 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,8 @@

PyPI Version PyPI Downloads - Python Versions - License + Python Versions + License

## 🚀 Overview From 101ca9ef90cc22bd00e17e369dae35ee2ea6fadc Mon Sep 17 00:00:00 2001 From: Quentin Fuxa <38427957+QuentinFuxa@users.noreply.github.com> Date: Wed, 28 May 2025 11:50:44 +0200 Subject: [PATCH 5/5] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 185a6ec..f4c5bd2 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@

PyPI Version PyPI Downloads - Python Versions + Python Versions License