mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-07 14:23:18 +00:00
Merge branch 'main' into ayo-logging-fixes
This commit is contained in:
@@ -183,7 +183,7 @@ online.init() # refresh if you're going to re-use the object for the next audio
|
||||
|
||||
### Server -- real-time from mic
|
||||
|
||||
`whisper_online_server.py` has the same model options as `whisper_online.py`, plus `--host` and `--port` of the TCP connection. See help message (`-h` option).
|
||||
`whisper_online_server.py` has the same model options as `whisper_online.py`, plus `--host` and `--port` of the TCP connection and the `--warmup-file`. See the help message (`-h` option).
|
||||
|
||||
Client example:
|
||||
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
|
||||
"""Functions for sending and receiving individual lines of text over a socket.
|
||||
|
||||
Used by marian-server-server.py to communicate with the Marian worker.
|
||||
|
||||
A line is transmitted using one or more fixed-size packets of UTF-8 bytes
|
||||
containing:
|
||||
|
||||
@@ -11,6 +9,7 @@ containing:
|
||||
|
||||
- Zero or more \0 bytes as required to pad the packet to PACKET_SIZE
|
||||
|
||||
Originally from the UEDIN team of the ELITR project.
|
||||
"""
|
||||
|
||||
PACKET_SIZE = 65536
|
||||
|
||||
@@ -559,7 +559,7 @@ def add_shared_args(parser):
|
||||
|
||||
def asr_factory(args, logfile=sys.stderr):
|
||||
"""
|
||||
Creates and configures an ASR instance based on the specified backend and arguments.
|
||||
Creates and configures an ASR and ASR Online instance based on the specified backend and arguments.
|
||||
"""
|
||||
backend = args.backend
|
||||
if backend == "openai-api":
|
||||
@@ -584,8 +584,23 @@ def asr_factory(args, logfile=sys.stderr):
|
||||
logging.info("Setting VAD filter")
|
||||
asr.use_vad()
|
||||
|
||||
return asr
|
||||
language = args.lan
|
||||
if args.task == "translate":
|
||||
asr.set_translate_task()
|
||||
tgt_language = "en" # Whisper translates into English
|
||||
else:
|
||||
tgt_language = language # Whisper transcribes in this language
|
||||
|
||||
# Create the tokenizer
|
||||
if args.buffer_trimming == "sentence":
|
||||
tokenizer = create_tokenizer(tgt_language)
|
||||
else:
|
||||
tokenizer = None
|
||||
|
||||
# Create the OnlineASRProcessor
|
||||
online = OnlineASRProcessor(asr,tokenizer,logfile=logfile,buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec))
|
||||
|
||||
return asr, online
|
||||
## main:
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -613,27 +628,13 @@ if __name__ == "__main__":
|
||||
duration = len(load_audio(audio_path))/SAMPLING_RATE
|
||||
logging.info("Audio duration is: %2.2f seconds" % duration)
|
||||
|
||||
asr = asr_factory(args, logfile=logfile)
|
||||
language = args.lan
|
||||
|
||||
if args.task == "translate":
|
||||
asr.set_translate_task()
|
||||
tgt_language = "en" # Whisper translates into English
|
||||
else:
|
||||
tgt_language = language # Whisper transcribes in this language
|
||||
|
||||
asr, online = asr_factory(args, logfile=logfile)
|
||||
min_chunk = args.min_chunk_size
|
||||
if args.buffer_trimming == "sentence":
|
||||
tokenizer = create_tokenizer(tgt_language)
|
||||
else:
|
||||
tokenizer = None
|
||||
online = OnlineASRProcessor(asr,tokenizer,logfile=logfile,buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec))
|
||||
|
||||
|
||||
# load the audio into the LRU cache before we start the timer
|
||||
a = load_audio_chunk(audio_path,0,1)
|
||||
|
||||
# warm up the ASR, because the very first transcribe takes much more time than the other
|
||||
# warm up the ASR because the very first transcribe takes much more time than the other
|
||||
asr.transcribe(a)
|
||||
|
||||
beg = args.start_at
|
||||
|
||||
@@ -12,6 +12,8 @@ parser = argparse.ArgumentParser()
|
||||
# server options
|
||||
parser.add_argument("--host", type=str, default='localhost')
|
||||
parser.add_argument("--port", type=int, default=43007)
|
||||
parser.add_argument("--warmup-file", type=str, dest="warmup_file",
|
||||
help="The path to a speech audio wav file to warm up Whisper so that the very first chunk processing is fast. It can be e.g. https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav .")
|
||||
|
||||
parser.add_argument("-l", "--log-level", dest="log_level",
|
||||
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
@@ -33,37 +35,28 @@ SAMPLING_RATE = 16000
|
||||
|
||||
size = args.model
|
||||
language = args.lan
|
||||
|
||||
asr = asr_factory(args)
|
||||
|
||||
if args.task == "translate":
|
||||
asr.set_translate_task()
|
||||
tgt_language = "en"
|
||||
else:
|
||||
tgt_language = language
|
||||
|
||||
asr, online = asr_factory(args)
|
||||
min_chunk = args.min_chunk_size
|
||||
|
||||
|
||||
if args.buffer_trimming == "sentence":
|
||||
tokenizer = create_tokenizer(tgt_language)
|
||||
else:
|
||||
tokenizer = None
|
||||
online = OnlineASRProcessor(asr,tokenizer,buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec))
|
||||
|
||||
|
||||
|
||||
demo_audio_path = "cs-maji-2.16k.wav"
|
||||
if os.path.exists(demo_audio_path):
|
||||
# load the audio into the LRU cache before we start the timer
|
||||
logging.debug(f"Warming up on {demo_audio_path}")
|
||||
a = load_audio_chunk(demo_audio_path,0,1)
|
||||
|
||||
# TODO: it should be tested whether it's meaningful
|
||||
# warm up the ASR, because the very first transcribe takes much more time than the other
|
||||
asr.transcribe(a)
|
||||
logging.debug("Whisper is warmed up")
|
||||
# warm up the ASR because the very first transcribe takes more time than the others.
|
||||
# Test results in https://github.com/ufal/whisper_streaming/pull/81
|
||||
msg = "Whisper is not warmed up. The first chunk processing may take longer."
|
||||
if args.warmup_file:
|
||||
if os.path.isfile(args.warmup_file):
|
||||
a = load_audio_chunk(args.warmup_file,0,1)
|
||||
asr.transcribe(a)
|
||||
print("INFO: Whisper is warmed up.",file=sys.stderr)
|
||||
else:
|
||||
print("WARNING: The warm up file is not available. "+msg,file=sys.stderr)
|
||||
else:
|
||||
logging.debug("Whisper is not warmed up")
|
||||
print("WARNING: " + msg, file=sys.stderr)
|
||||
|
||||
|
||||
######### Server objects
|
||||
|
||||
Reference in New Issue
Block a user