From 70e854b3464c11bf216519fe1baf88b3923ffcd5 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 27 Sep 2025 07:29:30 +0000 Subject: [PATCH] feat: Allow loading fine-tuned models in simulstreaming This change modifies the `simulstreaming` backend to support loading fine-tuned Whisper models via the `--model_dir` argument. The `SimulStreamingASR` class has been updated to: - Use the `model_dir` path directly to load the model, which is the correct procedure for fine-tuned `.pt` files. - Automatically disable the `faster-whisper` and `mlx-whisper` fast encoders when `model_dir` is used, as they are not compatible with standard fine-tuned models. The call site in `core.py` already passed the `model_dir` argument, so no changes were needed there. This change makes the `simulstreaming` backend more flexible and allows users to leverage their own custom models. --- whisperlivekit/simul_whisper/backend.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/whisperlivekit/simul_whisper/backend.py b/whisperlivekit/simul_whisper/backend.py index e816ed8..9b3368b 100644 --- a/whisperlivekit/simul_whisper/backend.py +++ b/whisperlivekit/simul_whisper/backend.py @@ -210,11 +210,15 @@ class SimulStreamingASR(): else: self.tokenizer = None - self.model_name = os.path.basename(self.cfg.model_path).replace(".pt", "") - self.model_path = os.path.dirname(os.path.abspath(self.cfg.model_path)) + if model_dir: + self.model_name = model_dir + self.model_path = None + else: + self.model_name = os.path.basename(self.cfg.model_path).replace(".pt", "") + self.model_path = os.path.dirname(os.path.abspath(self.cfg.model_path)) self.mlx_encoder, self.fw_encoder = None, None - if not self.disable_fast_encoder: + if not self.disable_fast_encoder and not model_dir: if HAS_MLX_WHISPER: print('Simulstreaming will use MLX whisper for a faster encoder.') mlx_model_name = mlx_model_mapping[self.model_name]