diff --git a/whisperlivekit/results_formater.py b/whisperlivekit/results_formater.py index 9598611..1e600ec 100644 --- a/whisperlivekit/results_formater.py +++ b/whisperlivekit/results_formater.py @@ -63,8 +63,8 @@ def format_output(state, silence, args, sep): previous_speaker = 1 undiarized_text = [] tokens = handle_silences(tokens, state.beg_loop, silence) - last_punctuation = None - for i, token in enumerate(tokens[last_validated_token:]): + for i in range(last_validated_token, len(tokens)): + token = tokens[i] speaker = int(token.speaker) token.corrected_speaker = speaker if not diarization: @@ -73,9 +73,9 @@ def format_output(state, silence, args, sep): token.validated_speaker = True else: if is_punctuation(token): - last_punctuation = i + state.last_punctuation_index = i - if last_punctuation == i-1: + if state.last_punctuation_index == i-1: if token.speaker != previous_speaker: token.validated_speaker = True # perfect, diarization perfectly aligned diff --git a/whisperlivekit/timed_objects.py b/whisperlivekit/timed_objects.py index 23dd3f5..7641178 100644 --- a/whisperlivekit/timed_objects.py +++ b/whisperlivekit/timed_objects.py @@ -176,6 +176,7 @@ class ChangeSpeaker: class State(): tokens: list = field(default_factory=list) last_validated_token: int = 0 + last_punctuation_index: Optional[int] = None translation_validated_segments: list = field(default_factory=list) translation_buffer: list = field(default_factory=list) buffer_transcription: str = field(default_factory=Transcript)