From 9a8d3cbd90fd8ed7048cc1578027da13271a87ce Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Sat, 24 Aug 2024 19:20:00 +0200 Subject: [PATCH] improve diarization + silence handling --- whisperlivekit/results_formater.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/whisperlivekit/results_formater.py b/whisperlivekit/results_formater.py index 5d1931f..e882cdd 100644 --- a/whisperlivekit/results_formater.py +++ b/whisperlivekit/results_formater.py @@ -125,7 +125,13 @@ def format_output(state, silence, current_time, diarization, debug): # should become: # Are you okay? |SPLIT SPEAKER| yeah, sure. Absolutely append_token_to_last_line(lines, sep, token, debug_info, last_end_diarized) - continue + continue + elif speaker == -2 or previous_speaker == -2: #silences can happen anytime + lines.append(new_line(token, speaker, last_end_diarized, debug_info = "")) + continue + else: #we create a new speaker, but that's no ideal. We are not sure about the split. We prefer to append to previous line + # lines.append(new_line(token, speaker, last_end_diarized, debug_info = "")) + pass append_token_to_last_line(lines, sep, token, debug_info, last_end_diarized) return lines, undiarized_text, buffer_transcription, ''