diff --git a/whisperlivekit/audio_processor.py b/whisperlivekit/audio_processor.py index 6fb9b19..89fcf91 100644 --- a/whisperlivekit/audio_processor.py +++ b/whisperlivekit/audio_processor.py @@ -361,21 +361,25 @@ class AudioProcessor: # in the future we want to have different languages for each speaker etc, so it will be more complex. while True: try: - tokens_to_process = await get_all_from_queue(self.translation_queue) - if tokens_to_process is SENTINEL: + item = await get_all_from_queue(self.translation_queue) + if item is SENTINEL: logger.debug("Translation processor received sentinel. Finishing.") - self.translation_queue.task_done() break - elif type(tokens_to_process) is Silence: - if tokens_to_process.has_ended: - self.translation.insert_silence(tokens_to_process.duration) - continue - if tokens_to_process: - self.translation.insert_tokens(tokens_to_process) - translation_validated_segments, buffer_translation = await asyncio.to_thread(self.translation.process) - async with self.lock: - self.state.new_translation = translation_validated_segments - self.state.new_translation_buffer = buffer_translation + elif type(item) is Silence: + if item.is_starting: + new_translation, new_translation_buffer = self.translation.validate_buffer_and_reset() + if item.has_ended: + self.translation.insert_silence(item.duration) + continue + elif isinstance(item, ChangeSpeaker): + new_translation, new_translation_buffer = self.translation.validate_buffer_and_reset() + pass + else: + self.translation.insert_tokens(item) + new_translation, new_translation_buffer = await asyncio.to_thread(self.translation.process) + async with self.lock: + self.state.new_translation.append(new_translation) + self.state.new_translation_buffer = new_translation_buffer except Exception as e: logger.warning(f"Exception in translation_processor: {e}") logger.warning(f"Traceback: {traceback.format_exc()}") diff --git a/whisperlivekit/timed_objects.py b/whisperlivekit/timed_objects.py index d0854e0..d75bf50 100644 --- a/whisperlivekit/timed_objects.py +++ b/whisperlivekit/timed_objects.py @@ -233,4 +233,4 @@ class State(): new_translation: List[Any] = field(default_factory=list) new_diarization: List[Any] = field(default_factory=list) new_tokens_buffer: List[Any] = field(default_factory=list) # only when local agreement - new_translation_buffer: str = '' \ No newline at end of file + new_translation_buffer= TimedText() \ No newline at end of file diff --git a/whisperlivekit/tokens_alignment.py b/whisperlivekit/tokens_alignment.py index 4711f52..3b694a5 100644 --- a/whisperlivekit/tokens_alignment.py +++ b/whisperlivekit/tokens_alignment.py @@ -33,15 +33,13 @@ class TokensAlignment: self.all_tokens.extend(self.new_tokens) self.all_diarization_segments.extend(self.new_diarization) - # self.all_translation_segments.extend(self.new_translation) #future - self.all_translation_segments = self.new_translation if self.new_translation != [] else self.all_translation_segments - self.new_translation_buffer = self.state.new_translation_buffer if self.new_translation else self.new_translation_buffer - self.new_translation_buffer = self.new_translation_buffer if type(self.new_translation_buffer) == str else self.new_translation_buffer.text + self.all_translation_segments.extend(self.new_translation) + self.new_translation_buffer = self.state.new_translation_buffer def add_translation(self, line: Line) -> None: for ts in self.all_translation_segments: if ts.is_within(line): - line.translation += ts.text + self.sep + line.translation += ts.text + (self.sep if ts.text else '') elif line.translation: break @@ -175,4 +173,4 @@ class TokensAlignment: )) if translation: [self.add_translation(line) for line in lines if not type(line) == Silence] - return lines, diarization_buffer, self.new_translation_buffer + return lines, diarization_buffer, self.new_translation_buffer.text