diff --git a/README.md b/README.md index 0f8aee4..ecb7a56 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ The package includes an HTML/JavaScript implementation [here](https://github.com An important list of parameters can be changed. But what *should* you change? - the `--model` size. List and recommandations [here](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/available_models.md) -- the `--language`. List [here](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/simul_whisper/whisper/tokenizer.py) +- the `--language`. List [here](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/simul_whisper/whisper/tokenizer.py). If you use `auto`, the model attempts to detect the language automatically, but it tends to bias towards English. - the `--backend` ? you can switch to `--backend faster-whisper` if `simulstreaming` does not work correctly or if you prefer to avoid the dual-license requirements. - `--warmup-file`, if you have one - `--host`, `--port`, `--ssl-certfile`, `--ssl-keyfile`, if you set up a server @@ -159,7 +159,7 @@ The rest I don't recommend. But below are your options. | Parameter | Description | Default | |-----------|-------------|---------| | `--model` | Whisper model size. | `small` | -| `--language` | Source language code or `auto` | `en` | +| `--language` | Source language code or `auto` | `auto` | | `--task` | `transcribe` or `translate` | `transcribe` | | `--backend` | Processing backend | `simulstreaming` | | `--min-chunk-size` | Minimum audio chunk size (seconds) | `1.0` | diff --git a/whisperlivekit/simul_whisper/backend.py b/whisperlivekit/simul_whisper/backend.py index e38eda9..1764a0a 100644 --- a/whisperlivekit/simul_whisper/backend.py +++ b/whisperlivekit/simul_whisper/backend.py @@ -212,7 +212,7 @@ class SimulStreamingASR(): logger.warning(SIMULSTREAMING_LICENSE) self.logfile = logfile self.transcribe_kargs = {} - self.original_language = None if lan == "auto" else lan + self.original_language = lan self.model_path = kwargs.get('model_path', './large-v3.pt') self.frame_threshold = kwargs.get('frame_threshold', 25) @@ -281,7 +281,7 @@ class SimulStreamingASR(): def load_model(self): whisper_model = load_model(name=self.model_name, download_root=self.model_path) warmup_audio = load_file(self.warmup_file) - whisper_model.transcribe(warmup_audio, language=self.original_language) + whisper_model.transcribe(warmup_audio, language=self.original_language if self.original_language != 'auto' else None) return whisper_model def get_new_model_instance(self):