From 4a5d5e1f3b336b06127e44f55cef51d9594e5123 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Fri, 29 Aug 2025 17:44:46 +0200 Subject: [PATCH] raise Exception when language == auto and task == translation --- whisperlivekit/audio_processor.py | 4 ++-- whisperlivekit/remove_silences.py | 2 +- whisperlivekit/simul_whisper/backend.py | 19 ++++++++++++------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/whisperlivekit/audio_processor.py b/whisperlivekit/audio_processor.py index c015639..01d79d5 100644 --- a/whisperlivekit/audio_processor.py +++ b/whisperlivekit/audio_processor.py @@ -120,7 +120,7 @@ class AudioProcessor: async def add_dummy_token(self): """Placeholder token when no transcription is available.""" async with self.lock: - current_time = time() - self.beg_loop + current_time = time() - self.beg_loop if self.beg_loop else 0 self.tokens.append(ASRToken( start=current_time, end=current_time + 1, text=".", speaker=-1, is_dummy=True @@ -295,7 +295,7 @@ class AudioProcessor: if type(item) is Silence: cumulative_pcm_duration_stream_time += item.duration - self.online.insert_silence(item.duration, self.tokens[-1].end) + self.online.insert_silence(item.duration, self.tokens[-1].end if self.tokens else 0) continue if isinstance(item, np.ndarray): diff --git a/whisperlivekit/remove_silences.py b/whisperlivekit/remove_silences.py index 5091f00..dc207fc 100644 --- a/whisperlivekit/remove_silences.py +++ b/whisperlivekit/remove_silences.py @@ -81,7 +81,7 @@ def ends_with_silence(tokens, buffer_transcription, buffer_diarization, current_ if not tokens: return [], buffer_transcription, buffer_diarization last_token = tokens[-1] - if tokens and ( + if tokens and current_time and ( current_time - last_token.end >= END_SILENCE_DURATION or (current_time - last_token.end >= 3 and vac_detected_silence) diff --git a/whisperlivekit/simul_whisper/backend.py b/whisperlivekit/simul_whisper/backend.py index 1764a0a..4d3eaa6 100644 --- a/whisperlivekit/simul_whisper/backend.py +++ b/whisperlivekit/simul_whisper/backend.py @@ -42,6 +42,8 @@ class SimulStreamingOnlineProcessor: self.committed: List[ASRToken] = [] self.last_result_tokens: List[ASRToken] = [] self.load_new_backend() + + #can be moved if asr.tokenizer: self.model.tokenizer = asr.tokenizer @@ -249,11 +251,6 @@ class SimulStreamingASR(): } self.model_path = model_mapping.get(modelsize, f'./{modelsize}.pt') - # Set up tokenizer for translation if needed - if self.task == "translate": - self.tokenizer = self.set_translate_task() - else: - self.tokenizer = None self.cfg = AlignAttConfig( model_path=self.model_path, segment_length=self.segment_length, @@ -271,6 +268,12 @@ class SimulStreamingASR(): static_init_prompt=self.static_init_prompt, ) + # Set up tokenizer for translation if needed + if self.task == "translate": + self.tokenizer = self.set_translate_task() + else: + self.tokenizer = None + self.model_name = os.path.basename(self.cfg.model_path).replace(".pt", "") self.model_path = os.path.dirname(os.path.abspath(self.cfg.model_path)) self.models = [self.load_model() for i in range(self.preload_model_count)] @@ -301,10 +304,12 @@ class SimulStreamingASR(): def set_translate_task(self): """Set up translation task.""" + if self.cfg.language == 'auto': + raise Exception('Translation cannot be done with language = auto') return tokenizer.get_tokenizer( multilingual=True, - language=self.model.cfg.language, - num_languages=self.model.model.num_languages, + language=self.cfg.language, + num_languages=99, task="translate" )