Trim buffer when no new ASR tokens are issued

This commit is contained in:
Quentin Fuxa
2025-06-30 11:55:07 +02:00
parent 7c0768e8f3
commit f668570292
2 changed files with 18 additions and 10 deletions

View File

@@ -233,14 +233,6 @@ class AudioProcessor:
buffer_size = max(int(32000 * elapsed_time), 4096)
beg = current_time
# Detect idle state much more quickly
if current_time - self.last_ffmpeg_activity > self.ffmpeg_max_idle_time:
logger.warning(f"FFmpeg process idle for {current_time - self.last_ffmpeg_activity:.2f}s. Restarting...")
await self.restart_ffmpeg()
beg = time()
self.last_ffmpeg_activity = time()
continue
chunk = await loop.run_in_executor(None, self.ffmpeg_process.stdout.read, buffer_size)
if chunk:
self.last_ffmpeg_activity = time()
@@ -554,9 +546,9 @@ class AudioProcessor:
logger.info(f"{task_name} completed normally.")
ffmpeg_idle_time = current_time - self.last_ffmpeg_activity
if ffmpeg_idle_time > 15:
if ffmpeg_idle_time > 10:
logger.warning(f"FFmpeg idle for {ffmpeg_idle_time:.2f}s - may need attention.")
if ffmpeg_idle_time > 30 and not self.is_stopping:
if ffmpeg_idle_time > 15 and not self.is_stopping:
logger.error("FFmpeg idle for too long and not in stopping phase, forcing restart.")
await self.restart_ffmpeg()
except asyncio.CancelledError:

View File

@@ -154,6 +154,7 @@ class OnlineASRProcessor:
self.buffer_time_offset = offset if offset is not None else 0.0
self.transcript_buffer.last_committed_time = self.buffer_time_offset
self.committed: List[ASRToken] = []
self.time_of_last_asr_output = 0.0
def get_audio_buffer_end_time(self) -> float:
"""Returns the absolute end time of the current audio_buffer."""
@@ -210,11 +211,26 @@ class OnlineASRProcessor:
self.transcript_buffer.insert(tokens, self.buffer_time_offset)
committed_tokens = self.transcript_buffer.flush()
self.committed.extend(committed_tokens)
if committed_tokens:
self.time_of_last_asr_output = self.committed[-1].end
completed = self.concatenate_tokens(committed_tokens)
logger.debug(f">>>> COMPLETE NOW: {completed.text}")
incomp = self.concatenate_tokens(self.transcript_buffer.buffer)
logger.debug(f"INCOMPLETE: {incomp.text}")
buffer_duration = len(self.audio_buffer) / self.SAMPLING_RATE
if not committed_tokens and buffer_duration > self.buffer_trimming_sec:
time_since_last_output = self.get_audio_buffer_end_time() - self.time_of_last_asr_output
if time_since_last_output > self.buffer_trimming_sec:
logger.warning(
f"No ASR output for {time_since_last_output:.2f}s. "
f"Resetting buffer to prevent freezing."
)
self.init(offset=self.get_audio_buffer_end_time())
return [], current_audio_processed_upto
if committed_tokens and self.buffer_trimming_way == "sentence":
if len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec:
self.chunk_completed_sentence()