From 483badf85d0729ec16b690d37448c97968c4952f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Mach=C3=A1=C4=8Dek?= Date: Thu, 23 Nov 2023 07:41:08 +0100 Subject: [PATCH 1/2] Update README.md so many "issues" with question about this :( --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7b18d96..480c8e7 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ The unused one does not have to be installed. We integrate the following segment ## Usage -### Realtime simulation from audio file +### Real-time simulation from audio file ``` usage: whisper_online.py [-h] [--min-chunk-size MIN_CHUNK_SIZE] [--model {tiny.en,tiny,base.en,base,small.en,small,medium.en,medium,large-v1,large-v2,large}] [--model_cache_dir MODEL_CACHE_DIR] [--model_dir MODEL_DIR] [--lan LAN] [--task {transcribe,translate}] @@ -149,7 +149,7 @@ print(o) # do something with the last output online.init() # refresh if you're going to re-use the object for the next audio ``` -### Server +### Server -- real-time from mic `whisper_online_server.py` has the same model options as `whisper_online.py`, plus `--host` and `--port` of the TCP connection. See help message (`-h` option). From 878f11cdb74266b5c3c0e3f2bdcc07358d2f0ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Mach=C3=A1=C4=8Dek?= Date: Sun, 26 Nov 2023 16:11:42 +0100 Subject: [PATCH 2/2] create_tokenizer in documentation #25 --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 480c8e7..265892c 100644 --- a/README.md +++ b/README.md @@ -126,14 +126,14 @@ from whisper_online import * src_lan = "en" # source language tgt_lan = "en" # target language -- same as source for ASR, "en" if translate task is used - asr = FasterWhisperASR(lan, "large-v2") # loads and wraps Whisper model # set options: # asr.set_translate_task() # it will translate from lan into English -# asr.use_vad() # set using VAD +# asr.use_vad() # set using VAD +tokenizer = create_tokenizer(tgt_lan) # sentence segmenter for the target language -online = OnlineASRProcessor(tgt_lan, asr) # create processing object +online = OnlineASRProcessor(asr, tokenizer) # create processing object while audio_has_not_ended: # processing loop: