From bfd60b39219d8502b8d601672453a3c233f90ae1 Mon Sep 17 00:00:00 2001
From: Quentin Fuxa <quentin.fuxa@gmail.com>
Date: Mon, 17 Nov 2025 22:52:00 +0100
Subject: [PATCH] Add audio partial silence in chunks handling. bump to
 0.2.14.post2

---
 pyproject.toml                    |  2 +-
 whisperlivekit/audio_processor.py | 42 +++++++++++++++++++++++++------
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5b96028..d387377 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "whisperlivekit"
-version = "0.2.14"
+version = "0.2.14.post2"
 description = "Real-time speech-to-text with speaker diarization using Whisper"
 readme = "README.md"
 authors = [
diff --git a/whisperlivekit/audio_processor.py b/whisperlivekit/audio_processor.py
index 41225e8..b3e0219 100644
--- a/whisperlivekit/audio_processor.py
+++ b/whisperlivekit/audio_processor.py
@@ -127,6 +127,7 @@ class AudioProcessor:
         self.diarization_queue = asyncio.Queue() if self.args.diarization else None
         self.translation_queue = asyncio.Queue() if self.args.target_language else None
         self.pcm_buffer = bytearray()
+        self.total_pcm_samples = 0
 
         self.transcription_task = None
         self.diarization_task = None
@@ -174,6 +175,26 @@ class AudioProcessor:
         self.start_silence = None
         self.last_silence_dispatch_time = None
 
+    async def _enqueue_active_audio(self, pcm_chunk: np.ndarray):
+        if pcm_chunk is None or pcm_chunk.size == 0:
+            return
+        if not self.diarization_before_transcription and self.transcription_queue:
+            await self.transcription_queue.put(pcm_chunk.copy())
+        if self.args.diarization and self.diarization_queue:
+            await self.diarization_queue.put(pcm_chunk.copy())
+        self.silence_duration = 0.0
+
+    def _slice_before_silence(self, pcm_array, chunk_sample_start, silence_sample):
+        if silence_sample is None:
+            return None
+        relative_index = int(silence_sample - chunk_sample_start)
+        if relative_index <= 0:
+            return None
+        split_index = min(relative_index, len(pcm_array))
+        if split_index <= 0:
+            return None
+        return pcm_array[:split_index]
+
     def convert_pcm_to_float(self, pcm_buffer):
         """Convert PCM buffer in s16le format to normalized NumPy array."""
         return np.frombuffer(pcm_buffer, dtype=np.int16).astype(np.float32) / 32768.0
@@ -669,25 +690,30 @@ class AudioProcessor:
         pcm_array = self.convert_pcm_to_float(self.pcm_buffer[:aligned_chunk_size])
         self.pcm_buffer = self.pcm_buffer[aligned_chunk_size:]
 
+        num_samples = len(pcm_array)
+        chunk_sample_start = self.total_pcm_samples
+        chunk_sample_end = chunk_sample_start + num_samples
+
         res = None
         if self.args.vac:
             res = self.vac(pcm_array)
 
         if res is not None:
-            if res.get("end", 0) > res.get("start", 0) and not self.silence:
+            silence_detected = res.get("end", 0) > res.get("start", 0)
+            if silence_detected and not self.silence:
+                pre_silence_chunk = self._slice_before_silence(
+                    pcm_array, chunk_sample_start, res.get("end")
+                )
+                if pre_silence_chunk is not None and pre_silence_chunk.size > 0:
+                    await self._enqueue_active_audio(pre_silence_chunk)
                 await self._begin_silence()
             elif self.silence:
                 await self._end_silence()
 
-
         if not self.silence:
-            if not self.diarization_before_transcription and self.transcription_queue:
-                await self.transcription_queue.put(pcm_array.copy())
+            await self._enqueue_active_audio(pcm_array)
 
-            if self.args.diarization and self.diarization_queue:
-                await self.diarization_queue.put(pcm_array.copy())
-
-            self.silence_duration = 0.0
+        self.total_pcm_samples = chunk_sample_end
 
         if not self.args.transcription and not self.args.diarization:
             await asyncio.sleep(0.1)