mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-07 14:23:18 +00:00
vac model is loaded in TranscriptionEngine, and by default
This commit is contained in:
@@ -66,10 +66,12 @@ class AudioProcessor:
|
||||
self.asr = models.asr
|
||||
self.tokenizer = models.tokenizer
|
||||
self.diarization = models.diarization
|
||||
import torch
|
||||
model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
|
||||
self.vac = FixedVADIterator(model)
|
||||
self.vac.reset_states()
|
||||
self.vac_model = models.vac_model
|
||||
if self.args.vac:
|
||||
self.vac = FixedVADIterator(models.vac_model)
|
||||
else:
|
||||
self.vac = None
|
||||
|
||||
self.ffmpeg_manager = FFmpegManager(
|
||||
sample_rate=self.sample_rate,
|
||||
channels=self.channels
|
||||
@@ -218,11 +220,14 @@ class AudioProcessor:
|
||||
# Process audio chunk
|
||||
pcm_array = self.convert_pcm_to_float(self.pcm_buffer[:self.max_bytes_per_sec])
|
||||
self.pcm_buffer = self.pcm_buffer[self.max_bytes_per_sec:]
|
||||
res = self.vac(pcm_array)
|
||||
|
||||
|
||||
res = None
|
||||
end_of_audio = False
|
||||
silence_buffer = None
|
||||
|
||||
if self.args.vac:
|
||||
res = self.vac(pcm_array)
|
||||
|
||||
if self.silence:
|
||||
print('NO AUDIO')
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ class TranscriptionEngine:
|
||||
"lan": "auto",
|
||||
"task": "transcribe",
|
||||
"backend": "faster-whisper",
|
||||
"vac": False,
|
||||
"vac": True,
|
||||
"vac_chunk_size": 0.04,
|
||||
"log_level": "DEBUG",
|
||||
"ssl_certfile": None,
|
||||
@@ -82,6 +82,11 @@ class TranscriptionEngine:
|
||||
self.asr = None
|
||||
self.tokenizer = None
|
||||
self.diarization = None
|
||||
self.vac_model = None
|
||||
|
||||
if self.args.vac:
|
||||
import torch
|
||||
self.vac_model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
|
||||
|
||||
if self.args.transcription:
|
||||
if self.args.backend == "simulstreaming":
|
||||
@@ -131,7 +136,7 @@ def online_factory(args, asr, tokenizer, logfile=sys.stderr):
|
||||
logfile=logfile,
|
||||
)
|
||||
# warmup_online(online, args.warmup_file)
|
||||
elif args.vac:
|
||||
elif False: #args.vac: #vac is now handled in audio_processor
|
||||
online = VACOnlineASRProcessor(
|
||||
args.min_chunk_size,
|
||||
asr,
|
||||
|
||||
@@ -113,8 +113,8 @@ def parse_args():
|
||||
)
|
||||
parser.add_argument(
|
||||
"--vac",
|
||||
action="store_true",
|
||||
default=False,
|
||||
# action="store_true",
|
||||
default=True,
|
||||
help="Use VAC = voice activity controller. Recommended. Requires torch.",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user