This commit is contained in:
Rodrigo
2023-12-09 17:12:43 -03:00
parent fe4207edca
commit 324dee03e7
3 changed files with 6 additions and 6 deletions

View File

@@ -72,12 +72,12 @@ SAMPLING_RATE = 16000
model = "large-v2"
src_lan = "en" # source language
tgt_lan = "en" # target language -- same as source for ASR, "en" if translate task is used
use_vad_result = True
use_vad = False
min_sample_length = 1 * SAMPLING_RATE
vad = VoiceActivityController(use_vad_result = use_vad_result)
vac = VoiceActivityController(use_vad_result = use_vad)
asr = FasterWhisperASR(src_lan, "large-v2") # loads and wraps Whisper model
tokenizer = create_tokenizer(tgt_lan)
@@ -85,7 +85,7 @@ online = SimpleASRProcessor(asr)
stream = MicrophoneStream()
stream = vad.detect_user_speech(stream, audio_in_int16 = False)
stream = vac.detect_user_speech(stream, audio_in_int16 = False)
stream = online.stream_process(stream)
for isFinal, text in stream:

View File

@@ -13,7 +13,7 @@ model = "large-v2"
src_lan = "en" # source language
tgt_lan = "en" # target language -- same as source for ASR, "en" if translate task is used
use_vad_result = True
min_sample_length = 1.5 * SAMPLING_RATE
min_sample_length = 1 * SAMPLING_RATE
@@ -54,12 +54,12 @@ for iter in vad.detect_user_speech(microphone_stream): # processing loop:
if is_final:
o = online.finish()
online.init()
# final_processing_pending = False
print('-----'*10)
complete_text = complete_text + o[2]
print('FINAL - '+ complete_text) # do something with current partial output
print('-----'*10)
online.init()
out = []
out_len = 0

View File

@@ -76,7 +76,7 @@ class VoiceActivityController:
if self.current_sample - self.temp_end < self.min_silence_samples:
return audio, 0, window_size_samples
else:
return np.array([], dtype=np.float16) , 0, window_size_samples
return np.array([], dtype=np.float16) if self.use_vad_result else audio, 0, window_size_samples