vac model is loaded in TranscriptionEngine, and by default

This commit is contained in:
Quentin Fuxa
2025-08-17 00:34:25 +02:00
parent 0f2eba507e
commit 7fe0353260
3 changed files with 20 additions and 10 deletions

View File

@@ -66,10 +66,12 @@ class AudioProcessor:
self.asr = models.asr
self.tokenizer = models.tokenizer
self.diarization = models.diarization
import torch
model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
self.vac = FixedVADIterator(model)
self.vac.reset_states()
self.vac_model = models.vac_model
if self.args.vac:
self.vac = FixedVADIterator(models.vac_model)
else:
self.vac = None
self.ffmpeg_manager = FFmpegManager(
sample_rate=self.sample_rate,
channels=self.channels
@@ -218,11 +220,14 @@ class AudioProcessor:
# Process audio chunk
pcm_array = self.convert_pcm_to_float(self.pcm_buffer[:self.max_bytes_per_sec])
self.pcm_buffer = self.pcm_buffer[self.max_bytes_per_sec:]
res = self.vac(pcm_array)
res = None
end_of_audio = False
silence_buffer = None
if self.args.vac:
res = self.vac(pcm_array)
if self.silence:
print('NO AUDIO')

View File

@@ -34,7 +34,7 @@ class TranscriptionEngine:
"lan": "auto",
"task": "transcribe",
"backend": "faster-whisper",
"vac": False,
"vac": True,
"vac_chunk_size": 0.04,
"log_level": "DEBUG",
"ssl_certfile": None,
@@ -82,6 +82,11 @@ class TranscriptionEngine:
self.asr = None
self.tokenizer = None
self.diarization = None
self.vac_model = None
if self.args.vac:
import torch
self.vac_model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
if self.args.transcription:
if self.args.backend == "simulstreaming":
@@ -131,7 +136,7 @@ def online_factory(args, asr, tokenizer, logfile=sys.stderr):
logfile=logfile,
)
# warmup_online(online, args.warmup_file)
elif args.vac:
elif False: #args.vac: #vac is now handled in audio_processor
online = VACOnlineASRProcessor(
args.min_chunk_size,
asr,

View File

@@ -113,8 +113,8 @@ def parse_args():
)
parser.add_argument(
"--vac",
action="store_true",
default=False,
# action="store_true",
default=True,
help="Use VAC = voice activity controller. Recommended. Requires torch.",
)
parser.add_argument(