From b42d8b2692ef404184b35a2003df2031e113719c Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Fri, 27 Jun 2025 10:00:19 +0200 Subject: [PATCH] add dual license warning indication when using simulstreaming backend --- README.md | 6 ++--- whisperlivekit/parse_args.py | 4 +-- .../dual_license_simulstreaming.md | 27 +++++++++++++++++++ .../whisper_streaming_custom/backends.py | 3 ++- .../whisper_online.py | 6 ++--- 5 files changed, 37 insertions(+), 9 deletions(-) create mode 100644 whisperlivekit/simul_whisper/dual_license_simulstreaming.md diff --git a/README.md b/README.md index 6eba003..30e21a6 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ ## πŸš€ Overview -This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and [SimulStreaming](https://github.com/ufal/SimulStreaming), allowing you to transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with a functional, simple and customizable frontend. Everything runs locally on your machine ✨ +This project is based on [WhisperStreaming](https://github.com/ufal/whisper_streaming) and [SimulStreaming](https://github.com/ufal/SimulStreaming), allowing you to transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with a functional, simple and customizable frontend. Everything runs locally on your machine ✨ ### πŸ”„ Architecture @@ -35,7 +35,7 @@ WhisperLiveKit consists of three main components: - **βœ… Confidence Validation** – Immediately validate high-confidence tokens for faster inference (WhisperStreaming only) - **πŸ‘οΈ Buffering Preview** – Displays unvalidated transcription segments (not compatible with SimulStreaming yet) - **βœ’οΈ Punctuation-Based Speaker Splitting [BETA]** - Align speaker changes with natural sentence boundaries for more readable transcripts -- **⚑ SimulStreaming Backend** - Ultra-low latency transcription using state-of-the-art AlignAtt policy. To use, please copy [simul_whisper](https://github.com/ufal/SimulStreaming/tree/main/simul_whisper) content into `whisperlivekit/simul_whisper` . You must comply with the [Polyform license](https://github.com/ufal/SimulStreaming/blob/main/LICENCE.txt) !! +- **⚑ SimulStreaming Backend** - Ultra-low latency transcription using state-of-the-art AlignAtt policy. The code is not directly included in the repo : To use, please copy [simul_whisper](https://github.com/ufal/SimulStreaming/tree/main/simul_whisper) content into `whisperlivekit/simul_whisper` . ⚠️ You must comply with the [Polyform license](https://github.com/ufal/SimulStreaming/blob/main/LICENCE.txt) ## πŸ“– Quick Start @@ -132,7 +132,7 @@ whisperlivekit-server --model tiny.en whisperlivekit-server --host 0.0.0.0 --port 8000 --model medium --diarization --language auto # SimulStreaming backend for ultra-low latency -whisperlivekit-server --backend simulstreaming-whisper --model large-v3 --frame-threshold 20 +whisperlivekit-server --backend simulstreaming --model large-v3 --frame-threshold 20 ``` diff --git a/whisperlivekit/parse_args.py b/whisperlivekit/parse_args.py index b031ec0..f30a6df 100644 --- a/whisperlivekit/parse_args.py +++ b/whisperlivekit/parse_args.py @@ -108,7 +108,7 @@ def parse_args(): "--backend", type=str, default="faster-whisper", - choices=["faster-whisper", "whisper_timestamped", "mlx-whisper", "openai-api", "simulstreaming-whisper"], + choices=["faster-whisper", "whisper_timestamped", "mlx-whisper", "openai-api", "simulstreaming"], help="Load only this backend for Whisper processing.", ) parser.add_argument( @@ -152,7 +152,7 @@ def parse_args(): parser.add_argument("--ssl-keyfile", type=str, help="Path to the SSL private key file.", default=None) # SimulStreaming-specific arguments - simulstreaming_group = parser.add_argument_group('SimulStreaming arguments (only used with --backend simulstreaming-whisper)') + simulstreaming_group = parser.add_argument_group('SimulStreaming arguments (only used with --backend simulstreaming)') simulstreaming_group.add_argument( "--frame-threshold", diff --git a/whisperlivekit/simul_whisper/dual_license_simulstreaming.md b/whisperlivekit/simul_whisper/dual_license_simulstreaming.md new file mode 100644 index 0000000..e6d4635 --- /dev/null +++ b/whisperlivekit/simul_whisper/dual_license_simulstreaming.md @@ -0,0 +1,27 @@ + + +πŸ“„ SimulStreaming (https://github.com/ufal/SimulStreaming) Licence + +SimulStreaming is dual-licensed: + +πŸ”Ή Non-Commercial Use + +You may use SimulStreaming under the **PolyForm Noncommercial License 1.0.0** if you +obtain the code through the GitHub repository. This license is **free of charge** +and comes with **no obligations** for non-commercial users. + +πŸ”Έ Commercial Use + +Understanding who uses SimulStreaming commercially helps us improve and +prioritize development. Therefore, we want to **require registration** of those who acquire a commercial licence. + +We plan to make the commercial licenceses **affordable** to SMEs and individuals. We +are considering to provide commercial licenses either for free or for symbolic +one-time fee, and maybe also provide additional support. You can share your preference via the [questionnaire](https://forms.cloud.microsoft/e/7tCxb4gJfB). + +You can also leave your contact [there](https://forms.cloud.microsoft/e/7tCxb4gJfB) to be notified when the commercial licenses become +available. + +βœ‰οΈ Contact + +[Dominik MachÑček](https://ufal.mff.cuni.cz/dominik-machacek/), machacek@ufal.mff.cuni.cz diff --git a/whisperlivekit/whisper_streaming_custom/backends.py b/whisperlivekit/whisper_streaming_custom/backends.py index 32b5044..954c9d4 100644 --- a/whisperlivekit/whisper_streaming_custom/backends.py +++ b/whisperlivekit/whisper_streaming_custom/backends.py @@ -316,7 +316,8 @@ class SimulStreamingASR(ASRBase): def __init__(self, lan, modelsize=None, cache_dir=None, model_dir=None, logfile=sys.stderr, **kwargs): if not SIMULSTREAMING_AVAILABLE: raise ImportError("SimulStreaming dependencies are not available. Please install SimulStreaming requirements.") - + with open("whisperlivekit/simul_whisper/dual_license_simulstreaming.md", "r") as f: + print("*"*80 + f.read() + "*"*80) self.logfile = logfile self.transcribe_kargs = {} self.original_language = None if lan == "auto" else lan diff --git a/whisperlivekit/whisper_streaming_custom/whisper_online.py b/whisperlivekit/whisper_streaming_custom/whisper_online.py index 8a3869d..4320a75 100644 --- a/whisperlivekit/whisper_streaming_custom/whisper_online.py +++ b/whisperlivekit/whisper_streaming_custom/whisper_online.py @@ -69,7 +69,7 @@ def backend_factory(args): if backend == "openai-api": logger.debug("Using OpenAI API.") asr = OpenaiApiASR(lan=args.lan) - elif backend == "simulstreaming-whisper": + elif backend == "simulstreaming": logger.debug("Using SimulStreaming backend.") if not SIMULSTREAMING_AVAILABLE: raise ImportError( @@ -128,7 +128,7 @@ def backend_factory(args): language = args.lan if args.task == "translate": - if backend != "simulstreaming-whisper": + if backend != "simulstreaming": asr.set_translate_task() tgt_language = "en" # Whisper translates into English else: @@ -142,7 +142,7 @@ def backend_factory(args): return asr, tokenizer def online_factory(args, asr, tokenizer, logfile=sys.stderr): - if args.backend == "simulstreaming-whisper": + if args.backend == "simulstreaming": if not SIMULSTREAMING_ONLINE_AVAILABLE: raise ImportError("SimulStreaming online processor is not available.")