From a175d1a3271533e8e9460acb6373c215aa34a1cc Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Sun, 23 Nov 2025 11:20:00 +0100 Subject: [PATCH] fixes silence detected but never reported by silero --- pyproject.toml | 4 ++-- whisperlivekit/audio_processor.py | 8 ++++---- whisperlivekit/silero_vad_iterator.py | 10 ++++++---- .../{vad_models => silero_vad_models}/__init__.py | 0 .../silero_vad.jit | Bin .../silero_vad.onnx | Bin .../silero_vad_16k_op15.onnx | Bin .../silero_vad_half.onnx | Bin 8 files changed, 12 insertions(+), 10 deletions(-) rename whisperlivekit/{vad_models => silero_vad_models}/__init__.py (100%) rename whisperlivekit/{vad_models => silero_vad_models}/silero_vad.jit (100%) rename whisperlivekit/{vad_models => silero_vad_models}/silero_vad.onnx (100%) rename whisperlivekit/{vad_models => silero_vad_models}/silero_vad_16k_op15.onnx (100%) rename whisperlivekit/{vad_models => silero_vad_models}/silero_vad_half.onnx (100%) diff --git a/pyproject.toml b/pyproject.toml index d4cb040..7221669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,10 +61,10 @@ packages = [ "whisperlivekit.whisper.normalizers", "whisperlivekit.web", "whisperlivekit.local_agreement", - "whisperlivekit.vad_models" + "whisperlivekit.silero_vad_models" ] [tool.setuptools.package-data] whisperlivekit = ["web/*.html", "web/*.css", "web/*.js", "web/src/*.svg"] "whisperlivekit.whisper.assets" = ["*.tiktoken", "*.npz"] -"whisperlivekit.vad_models" = ["*.jit", "*.onnx"] +"whisperlivekit.silero_vad_models" = ["*.jit", "*.onnx"] diff --git a/whisperlivekit/audio_processor.py b/whisperlivekit/audio_processor.py index 89fcf91..ee53c54 100644 --- a/whisperlivekit/audio_processor.py +++ b/whisperlivekit/audio_processor.py @@ -603,16 +603,16 @@ class AudioProcessor: res = self.vac(pcm_array) if res is not None: - silence_detected = res.get("end", 0) > res.get("start", 0) - if silence_detected and not self.current_silence: + if "start" in res and self.current_silence: + await self._end_silence() + + if "end" in res and not self.current_silence: pre_silence_chunk = self._slice_before_silence( pcm_array, chunk_sample_start, res.get("end") ) if pre_silence_chunk is not None and pre_silence_chunk.size > 0: await self._enqueue_active_audio(pre_silence_chunk) await self._begin_silence() - elif self.current_silence: - await self._end_silence() if not self.current_silence: await self._enqueue_active_audio(pcm_array) diff --git a/whisperlivekit/silero_vad_iterator.py b/whisperlivekit/silero_vad_iterator.py index ad2d2ba..d53d056 100644 --- a/whisperlivekit/silero_vad_iterator.py +++ b/whisperlivekit/silero_vad_iterator.py @@ -123,7 +123,7 @@ def load_silero_vad(model_path: str = None, onnx: bool = False, opset_version: i raise Exception(f'Available ONNX opset_version: {available_ops}') if model_path is None: current_dir = Path(__file__).parent - data_dir = current_dir / 'vad_models' + data_dir = current_dir / 'silero_vad_models' if onnx: if opset_version == 16: @@ -138,7 +138,7 @@ def load_silero_vad(model_path: str = None, onnx: bool = False, opset_version: i if not model_path.exists(): raise FileNotFoundError( f"Model file not found: {model_path}\n" - f"Please ensure the whisperlivekit/vad_models/ directory contains the model files." + f"Please ensure the whisperlivekit/silero_vad_models/ directory contains the model files." ) else: model_path = Path(model_path) @@ -276,8 +276,10 @@ class FixedVADIterator(VADIterator): elif r is not None: if "end" in r: ret["end"] = r["end"] - if "start" in r and "end" in ret: - del ret["end"] + if "start" in r: + ret["start"] = r["start"] + if "end" in ret: + del ret["end"] return ret if ret != {} else None diff --git a/whisperlivekit/vad_models/__init__.py b/whisperlivekit/silero_vad_models/__init__.py similarity index 100% rename from whisperlivekit/vad_models/__init__.py rename to whisperlivekit/silero_vad_models/__init__.py diff --git a/whisperlivekit/vad_models/silero_vad.jit b/whisperlivekit/silero_vad_models/silero_vad.jit similarity index 100% rename from whisperlivekit/vad_models/silero_vad.jit rename to whisperlivekit/silero_vad_models/silero_vad.jit diff --git a/whisperlivekit/vad_models/silero_vad.onnx b/whisperlivekit/silero_vad_models/silero_vad.onnx similarity index 100% rename from whisperlivekit/vad_models/silero_vad.onnx rename to whisperlivekit/silero_vad_models/silero_vad.onnx diff --git a/whisperlivekit/vad_models/silero_vad_16k_op15.onnx b/whisperlivekit/silero_vad_models/silero_vad_16k_op15.onnx similarity index 100% rename from whisperlivekit/vad_models/silero_vad_16k_op15.onnx rename to whisperlivekit/silero_vad_models/silero_vad_16k_op15.onnx diff --git a/whisperlivekit/vad_models/silero_vad_half.onnx b/whisperlivekit/silero_vad_models/silero_vad_half.onnx similarity index 100% rename from whisperlivekit/vad_models/silero_vad_half.onnx rename to whisperlivekit/silero_vad_models/silero_vad_half.onnx