From 4a6868e3e1c35aea3c8a8d0899b0dd5c10673a67 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Sun, 22 Feb 2026 21:13:21 +0100 Subject: [PATCH] correct processor attributes mixtral --- whisperlivekit/voxtral_hf_streaming.py | 45 +++++++++++++++----------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/whisperlivekit/voxtral_hf_streaming.py b/whisperlivekit/voxtral_hf_streaming.py index 2fee95f..89ffbd7 100644 --- a/whisperlivekit/voxtral_hf_streaming.py +++ b/whisperlivekit/voxtral_hf_streaming.py @@ -85,10 +85,11 @@ class VoxtralHFStreamingOnlineProcessor: processor = asr.processor self._first_chunk_samples = processor.num_samples_first_audio_chunk self._chunk_samples = processor.num_samples_per_audio_chunk - self._chunk_step = processor.num_samples_per_audio_chunk_step - self._right_pad_samples = int( - processor.num_right_pad_tokens * processor.raw_audio_length_per_tok - ) + self._chunk_step = processor.raw_audio_length_per_tok + n_right_pad = processor.num_right_pad_tokens + if callable(n_right_pad): + n_right_pad = n_right_pad() + self._right_pad_samples = int(n_right_pad * processor.raw_audio_length_per_tok) self._seconds_per_token = processor.raw_audio_length_per_tok / self.SAMPLING_RATE self._reset_state() @@ -238,10 +239,16 @@ class VoxtralHFStreamingOnlineProcessor: def run_generate(): try: with torch.no_grad(): + # Pass generator as input_features — the model detects GeneratorType + # and internally converts it to input_features_generator + generate_kwargs = { + k: v for k, v in first_inputs.items() + if k != "input_features" + } model.generate( - input_features_generator=input_features_gen(), + input_features=input_features_gen(), streamer=streamer, - **first_inputs, + **generate_kwargs, ) except Exception as e: logger.error(f"[voxtral-hf] generate error: {e}", exc_info=True) @@ -271,18 +278,20 @@ class VoxtralHFStreamingOnlineProcessor: if not self._generate_started: return - streamer = self._streamer - try: - for text_fragment in streamer: - if text_fragment: - with self._text_lock: - self._accumulated_text += text_fragment - self._n_text_tokens_received += 1 - # Check if more is immediately available (non-blocking) - if streamer.text_queue.empty(): - break - except StopIteration: - pass + text_queue = self._streamer.text_queue + while True: + try: + text_fragment = text_queue.get_nowait() + except queue.Empty: + break + # TextIteratorStreamer uses None as end-of-stream sentinel + if text_fragment is None: + self._generate_finished = True + break + if text_fragment: + with self._text_lock: + self._accumulated_text += text_fragment + self._n_text_tokens_received += 1 # ── Word extraction ──