last_validated_token handled in state

This commit is contained in:
Quentin Fuxa
2025-11-10 13:18:52 +01:00
parent 13401ffe24
commit 5491dbd824
2 changed files with 5 additions and 4 deletions

View File

@@ -63,8 +63,8 @@ def format_output(state, silence, args, sep):
previous_speaker = 1
undiarized_text = []
tokens = handle_silences(tokens, state.beg_loop, silence)
last_punctuation = None
for i, token in enumerate(tokens[last_validated_token:]):
for i in range(last_validated_token, len(tokens)):
token = tokens[i]
speaker = int(token.speaker)
token.corrected_speaker = speaker
if not diarization:
@@ -73,9 +73,9 @@ def format_output(state, silence, args, sep):
token.validated_speaker = True
else:
if is_punctuation(token):
last_punctuation = i
state.last_punctuation_index = i
if last_punctuation == i-1:
if state.last_punctuation_index == i-1:
if token.speaker != previous_speaker:
token.validated_speaker = True
# perfect, diarization perfectly aligned

View File

@@ -176,6 +176,7 @@ class ChangeSpeaker:
class State():
tokens: list = field(default_factory=list)
last_validated_token: int = 0
last_punctuation_index: Optional[int] = None
translation_validated_segments: list = field(default_factory=list)
translation_buffer: list = field(default_factory=list)
buffer_transcription: str = field(default_factory=Transcript)