mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-07 14:23:18 +00:00
Merge pull request #211 from Alexander-ARTV/main
Fix type error when setting encoder_feature in simul_whisper->infer for faster whisper encoder
This commit is contained in:
@@ -408,8 +408,13 @@ class PaddedAlignAttWhisper:
|
||||
content_mel_len = int(audio_length_seconds * 100)//2
|
||||
mel_padded_2 = self.fw_feature_extractor(waveform=input_segments.numpy(), padding=N_SAMPLES)[None, :]
|
||||
mel = fw_pad_or_trim(mel_padded_2, N_FRAMES, axis=-1)
|
||||
encoder_feature_ctranslate = np.array(self.fw_encoder.encode(mel))
|
||||
encoder_feature = torch.as_tensor(encoder_feature_ctranslate, device=self.device)
|
||||
encoder_feature_ctranslate = self.fw_encoder.encode(mel)
|
||||
if self.device == 'cpu': #it seems that on gpu, passing StorageView to torch.as_tensor fails and wrapping in the array works
|
||||
encoder_feature_ctranslate = np.array(encoder_feature_ctranslate)
|
||||
try:
|
||||
encoder_feature = torch.as_tensor(encoder_feature_ctranslate, device=self.device)
|
||||
except TypeError: # Normally the cpu condition should prevent having exceptions, but just in case:
|
||||
encoder_feature = torch.as_tensor(np.array(encoder_feature_ctranslate), device=self.device)
|
||||
else:
|
||||
# mel + padding to 30s
|
||||
mel_padded = log_mel_spectrogram(input_segments, n_mels=self.model.dims.n_mels, padding=N_SAMPLES,
|
||||
|
||||
Reference in New Issue
Block a user