Trim buffer when no new ASR tokens are issued

2026-03-07 22:33:36 +00:00 · 2025-06-30 11:55:07 +02:00
parent 7c0768e8f3
commit f668570292
2 changed files with 18 additions and 10 deletions
--- a/whisperlivekit/audio_processor.py
+++ b/whisperlivekit/audio_processor.py
@@ -233,14 +233,6 @@ class AudioProcessor:
                buffer_size = max(int(32000 * elapsed_time), 4096)
                beg = current_time

-                # Detect idle state much more quickly
-                if current_time - self.last_ffmpeg_activity > self.ffmpeg_max_idle_time:
-                    logger.warning(f"FFmpeg process idle for {current_time - self.last_ffmpeg_activity:.2f}s. Restarting...")
-                    await self.restart_ffmpeg()
-                    beg = time()
-                    self.last_ffmpeg_activity = time()
-                    continue
-
                chunk = await loop.run_in_executor(None, self.ffmpeg_process.stdout.read, buffer_size)
                if chunk:
                    self.last_ffmpeg_activity = time()
@@ -554,9 +546,9 @@ class AudioProcessor:
                            logger.info(f"{task_name} completed normally.")
                
                ffmpeg_idle_time = current_time - self.last_ffmpeg_activity
-                if ffmpeg_idle_time > 15:
+                if ffmpeg_idle_time > 10:
                    logger.warning(f"FFmpeg idle for {ffmpeg_idle_time:.2f}s - may need attention.")
-                    if ffmpeg_idle_time > 30 and not self.is_stopping:
+                    if ffmpeg_idle_time > 15 and not self.is_stopping:
                        logger.error("FFmpeg idle for too long and not in stopping phase, forcing restart.")
                        await self.restart_ffmpeg()
            except asyncio.CancelledError:
--- a/whisperlivekit/whisper_streaming_custom/online_asr.py
+++ b/whisperlivekit/whisper_streaming_custom/online_asr.py
@@ -154,6 +154,7 @@ class OnlineASRProcessor:
        self.buffer_time_offset = offset if offset is not None else 0.0
        self.transcript_buffer.last_committed_time = self.buffer_time_offset
        self.committed: List[ASRToken] = []
+        self.time_of_last_asr_output = 0.0

    def get_audio_buffer_end_time(self) -> float:
        """Returns the absolute end time of the current audio_buffer."""
@@ -210,11 +211,26 @@ class OnlineASRProcessor:
        self.transcript_buffer.insert(tokens, self.buffer_time_offset)
        committed_tokens = self.transcript_buffer.flush()
        self.committed.extend(committed_tokens)
+
+        if committed_tokens:
+            self.time_of_last_asr_output = self.committed[-1].end
+
        completed = self.concatenate_tokens(committed_tokens)
        logger.debug(f">>>> COMPLETE NOW: {completed.text}")
        incomp = self.concatenate_tokens(self.transcript_buffer.buffer)
        logger.debug(f"INCOMPLETE: {incomp.text}")

+        buffer_duration = len(self.audio_buffer) / self.SAMPLING_RATE
+        if not committed_tokens and buffer_duration > self.buffer_trimming_sec:
+            time_since_last_output = self.get_audio_buffer_end_time() - self.time_of_last_asr_output
+            if time_since_last_output > self.buffer_trimming_sec:
+                logger.warning(
+                    f"No ASR output for {time_since_last_output:.2f}s. "
+                    f"Resetting buffer to prevent freezing."
+                )
+                self.init(offset=self.get_audio_buffer_end_time())
+                return [], current_audio_processed_upto
+
        if committed_tokens and self.buffer_trimming_way == "sentence":
            if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec:
                self.chunk_completed_sentence()