improve diarization + silence handling

This commit is contained in:
Quentin Fuxa
2024-08-24 19:20:00 +02:00
parent b101ce06bd
commit 9a8d3cbd90

View File

@@ -125,7 +125,13 @@ def format_output(state, silence, current_time, diarization, debug):
# should become:
# Are you okay? |SPLIT SPEAKER| yeah, sure. Absolutely
append_token_to_last_line(lines, sep, token, debug_info, last_end_diarized)
continue
continue
elif speaker == -2 or previous_speaker == -2: #silences can happen anytime
lines.append(new_line(token, speaker, last_end_diarized, debug_info = ""))
continue
else: #we create a new speaker, but that's no ideal. We are not sure about the split. We prefer to append to previous line
# lines.append(new_line(token, speaker, last_end_diarized, debug_info = ""))
pass
append_token_to_last_line(lines, sep, token, debug_info, last_end_diarized)
return lines, undiarized_text, buffer_transcription, ''