Fix warmup file behavior

This commit is contained in:
notV3NOM
2025-09-13 20:44:24 +05:30
parent babe93b99a
commit ebaf36a8be
3 changed files with 33 additions and 36 deletions

View File

@@ -120,7 +120,7 @@ class TranscriptionEngine:
else:
self.asr, self.tokenizer = backend_factory(self.args)
warmup_asr(self.asr, self.args.warmup_file) #for simulstreaming, warmup should be done in the online class not here
warmup_asr(self.asr, self.args.warmup_file) #for simulstreaming, warmup should be done in the online class not here
if self.args.diarization:
if self.args.diarization_backend == "diart":

View File

@@ -20,7 +20,7 @@ def parse_args():
help="""
The path to a speech audio wav file to warm up Whisper so that the very first chunk processing is fast.
If not set, uses https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav.
If False, no warmup is performed.
If empty, no warmup is performed.
""",
)

View File

@@ -6,57 +6,54 @@ logger = logging.getLogger(__name__)
def load_file(warmup_file=None, timeout=5):
import os
import tempfile
import urllib.request
import librosa
if warmup_file == "":
logger.info(f"Skipping warmup.")
return None
# Download JFK sample if not already present
if warmup_file is None:
# Download JFK sample if not already present
jfk_url = "https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav"
temp_dir = tempfile.gettempdir()
warmup_file = os.path.join(temp_dir, "whisper_warmup_jfk.wav")
if not os.path.exists(warmup_file):
logger.debug(f"Downloading warmup file from {jfk_url}")
print(f"Downloading warmup file from {jfk_url}")
import time
import urllib.request
import urllib.error
import socket
original_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(timeout)
start_time = time.time()
if not os.path.exists(warmup_file) or os.path.getsize(warmup_file) == 0:
try:
urllib.request.urlretrieve(jfk_url, warmup_file)
logger.debug(f"Download successful in {time.time() - start_time:.2f}s")
except (urllib.error.URLError, socket.timeout) as e:
logger.warning(f"Download failed: {e}. Proceeding without warmup.")
logger.debug(f"Downloading warmup file from {jfk_url}")
with urllib.request.urlopen(jfk_url, timeout=timeout) as r, open(warmup_file, "wb") as f:
f.write(r.read())
except Exception as e:
logger.warning(f"Warmup file download failed: {e}.")
return None
finally:
socket.setdefaulttimeout(original_timeout)
elif not warmup_file:
return None
if not warmup_file or not os.path.exists(warmup_file) or os.path.getsize(warmup_file) == 0:
logger.warning(f"Warmup file {warmup_file} invalid or missing.")
# Validate file and load
if not os.path.exists(warmup_file) or os.path.getsize(warmup_file) == 0:
logger.warning(f"Warmup file {warmup_file} is invalid or missing.")
return None
try:
audio, sr = librosa.load(warmup_file, sr=16000)
audio, _ = librosa.load(warmup_file, sr=16000)
return audio
except Exception as e:
logger.warning(f"Failed to load audio file: {e}")
logger.warning(f"Failed to load warmup file: {e}")
return None
return audio
def warmup_asr(asr, warmup_file=None, timeout=5):
"""
Warmup the ASR model by transcribing a short audio file.
"""
audio = load_file(warmup_file=None, timeout=5)
audio = load_file(warmup_file=warmup_file, timeout=timeout)
if audio is None:
logger.warning("Warmup file unavailable. Skipping ASR warmup.")
return
asr.transcribe(audio)
logger.info("ASR model is warmed up")
logger.info("ASR model is warmed up.")
def warmup_online(online, warmup_file=None, timeout=5):
audio = load_file(warmup_file=None, timeout=5)
audio = load_file(warmup_file=warmup_file, timeout=timeout)
if audio is None:
logger.warning("Warmup file unavailable. Skipping online warmup.")
return
online.warmup(audio)
logger.warning("ASR is warmed up")
logger.info("Online processor is warmed up.")