auto detection of language for warmup if not indicated

2026-03-07 14:23:18 +00:00 · 2025-08-27 20:37:48 +02:00
parent ab98c31f16
commit 9895bc83bf
2 changed files with 4 additions and 4 deletions
--- a/README.md
+++ b/README.md
@@ -148,7 +148,7 @@ The package includes an HTML/JavaScript implementation [here](https://github.com

 An important list of parameters can be changed. But what *should* you change?
 - the `--model` size. List and recommandations [here](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/available_models.md)
- the `--language`.  List [here](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/simul_whisper/whisper/tokenizer.py)
+- the `--language`.  List [here](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/simul_whisper/whisper/tokenizer.py). If you use `auto`, the model attempts to detect the language automatically, but it tends to bias towards English.
 - the `--backend` ? you can switch to `--backend faster-whisper` if  `simulstreaming` does not work correctly or if you prefer to avoid the dual-license requirements.
 - `--warmup-file`, if you have one
 - `--host`, `--port`, `--ssl-certfile`, `--ssl-keyfile`, if you set up a server
@@ -159,7 +159,7 @@ The rest I don't recommend. But below are your options.
 | Parameter | Description | Default |
 |-----------|-------------|---------|
 | `--model` | Whisper model size. | `small` |
-| `--language` | Source language code or `auto` | `en` |
+| `--language` | Source language code or `auto` | `auto` |
 | `--task` | `transcribe` or `translate` | `transcribe` |
 | `--backend` | Processing backend | `simulstreaming` |
 | `--min-chunk-size` | Minimum audio chunk size (seconds) | `1.0` |
--- a/whisperlivekit/simul_whisper/backend.py
+++ b/whisperlivekit/simul_whisper/backend.py
@@ -212,7 +212,7 @@ class SimulStreamingASR():
        logger.warning(SIMULSTREAMING_LICENSE)
        self.logfile = logfile
        self.transcribe_kargs = {}
-        self.original_language = None if lan == "auto" else lan
+        self.original_language = lan
        
        self.model_path = kwargs.get('model_path', './large-v3.pt')
        self.frame_threshold = kwargs.get('frame_threshold', 25)
@@ -281,7 +281,7 @@ class SimulStreamingASR():
    def load_model(self):
        whisper_model = load_model(name=self.model_name, download_root=self.model_path)
        warmup_audio = load_file(self.warmup_file)
-        whisper_model.transcribe(warmup_audio, language=self.original_language)
+        whisper_model.transcribe(warmup_audio, language=self.original_language if self.original_language != 'auto' else None)
        return whisper_model
    
    def get_new_model_instance(self):