From c7b3bb5e582169e9b616b397245e45f9b6b33eee Mon Sep 17 00:00:00 2001 From: Alexander Lindberg Date: Tue, 9 Sep 2025 11:18:55 +0300 Subject: [PATCH] Fix regression with faster-whisper encoder_feature --- whisperlivekit/simul_whisper/simul_whisper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisperlivekit/simul_whisper/simul_whisper.py b/whisperlivekit/simul_whisper/simul_whisper.py index 3b0e7c5..5707e41 100644 --- a/whisperlivekit/simul_whisper/simul_whisper.py +++ b/whisperlivekit/simul_whisper/simul_whisper.py @@ -408,7 +408,7 @@ class PaddedAlignAttWhisper: content_mel_len = int(audio_length_seconds * 100)//2 mel_padded_2 = self.fw_feature_extractor(waveform=input_segments.numpy(), padding=N_SAMPLES)[None, :] mel = fw_pad_or_trim(mel_padded_2, N_FRAMES, axis=-1) - encoder_feature_ctranslate = np.array(self.fw_encoder.encode(mel)) + encoder_feature_ctranslate = self.fw_encoder.encode(mel) encoder_feature = torch.as_tensor(encoder_feature_ctranslate, device=self.device) else: # mel + padding to 30s