From b42d8b2692ef404184b35a2003df2031e113719c Mon Sep 17 00:00:00 2001
From: Quentin Fuxa <quentin.fuxa@gmail.com>
Date: Fri, 27 Jun 2025 10:00:19 +0200
Subject: [PATCH] add dual license warning indication when using simulstreaming
 backend

---
 README.md                                     |  6 ++---
 whisperlivekit/parse_args.py                  |  4 +--
 .../dual_license_simulstreaming.md            | 27 +++++++++++++++++++
 .../whisper_streaming_custom/backends.py      |  3 ++-
 .../whisper_online.py                         |  6 ++---
 5 files changed, 37 insertions(+), 9 deletions(-)
 create mode 100644 whisperlivekit/simul_whisper/dual_license_simulstreaming.md

diff --git a/README.md b/README.md
index 6eba003..30e21a6 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
 
 ## 🚀 Overview
 
-This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and [SimulStreaming](https://github.com/ufal/SimulStreaming), allowing you to transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with a functional, simple and customizable frontend. Everything runs locally on your machine ✨
+This project is based on [WhisperStreaming](https://github.com/ufal/whisper_streaming) and [SimulStreaming](https://github.com/ufal/SimulStreaming), allowing you to transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with a functional, simple and customizable frontend. Everything runs locally on your machine ✨
 
 ### 🔄 Architecture
 
@@ -35,7 +35,7 @@ WhisperLiveKit consists of three main components:
 - **✅ Confidence Validation** – Immediately validate high-confidence tokens for faster inference (WhisperStreaming only)
 - **👁️ Buffering Preview** – Displays unvalidated transcription segments (not compatible with SimulStreaming yet)
 - **✒️ Punctuation-Based Speaker Splitting [BETA]** - Align speaker changes with natural sentence boundaries for more readable transcripts
-- **⚡ SimulStreaming Backend** - Ultra-low latency transcription using state-of-the-art AlignAtt policy. To use, please copy [simul_whisper](https://github.com/ufal/SimulStreaming/tree/main/simul_whisper) content into `whisperlivekit/simul_whisper` . You must comply with the [Polyform license](https://github.com/ufal/SimulStreaming/blob/main/LICENCE.txt) !!
+- **⚡ SimulStreaming Backend** - Ultra-low latency transcription using state-of-the-art AlignAtt policy. The code is not directly included in the repo : To use, please copy [simul_whisper](https://github.com/ufal/SimulStreaming/tree/main/simul_whisper) content into `whisperlivekit/simul_whisper` . ⚠️ You must comply with the [Polyform license](https://github.com/ufal/SimulStreaming/blob/main/LICENCE.txt)
 
 
 ## 📖 Quick Start
@@ -132,7 +132,7 @@ whisperlivekit-server --model tiny.en
 whisperlivekit-server --host 0.0.0.0 --port 8000 --model medium --diarization --language auto
 
 # SimulStreaming backend for ultra-low latency
-whisperlivekit-server --backend simulstreaming-whisper --model large-v3 --frame-threshold 20
+whisperlivekit-server --backend simulstreaming --model large-v3 --frame-threshold 20
 ```
 
 
diff --git a/whisperlivekit/parse_args.py b/whisperlivekit/parse_args.py
index b031ec0..f30a6df 100644
--- a/whisperlivekit/parse_args.py
+++ b/whisperlivekit/parse_args.py
@@ -108,7 +108,7 @@ def parse_args():
         "--backend",
         type=str,
         default="faster-whisper",
-        choices=["faster-whisper", "whisper_timestamped", "mlx-whisper", "openai-api", "simulstreaming-whisper"],
+        choices=["faster-whisper", "whisper_timestamped", "mlx-whisper", "openai-api", "simulstreaming"],
         help="Load only this backend for Whisper processing.",
     )
     parser.add_argument(
@@ -152,7 +152,7 @@ def parse_args():
     parser.add_argument("--ssl-keyfile", type=str, help="Path to the SSL private key file.", default=None)
 
     # SimulStreaming-specific arguments
-    simulstreaming_group = parser.add_argument_group('SimulStreaming arguments (only used with --backend simulstreaming-whisper)')
+    simulstreaming_group = parser.add_argument_group('SimulStreaming arguments (only used with --backend simulstreaming)')
     
     simulstreaming_group.add_argument(
         "--frame-threshold",
diff --git a/whisperlivekit/simul_whisper/dual_license_simulstreaming.md b/whisperlivekit/simul_whisper/dual_license_simulstreaming.md
new file mode 100644
index 0000000..e6d4635
--- /dev/null
+++ b/whisperlivekit/simul_whisper/dual_license_simulstreaming.md
@@ -0,0 +1,27 @@
+
+
+📄 SimulStreaming (https://github.com/ufal/SimulStreaming) Licence
+
+SimulStreaming is dual-licensed:
+
+🔹 Non-Commercial Use
+
+You may use SimulStreaming under the **PolyForm Noncommercial License 1.0.0** if you
+obtain the code through the GitHub repository.  This license is **free of charge**
+and comes with **no obligations** for non-commercial users.
+
+🔸 Commercial Use
+
+Understanding who uses SimulStreaming commercially helps us improve and
+prioritize development. Therefore, we want to **require registration** of those who acquire a commercial licence.
+
+We plan to make the commercial licenceses **affordable** to SMEs and individuals. We
+are considering to provide commercial licenses either for free or for symbolic
+one-time fee, and maybe also provide additional support. You can share your preference via the [questionnaire](https://forms.cloud.microsoft/e/7tCxb4gJfB).
+
+You can also leave your contact [there](https://forms.cloud.microsoft/e/7tCxb4gJfB) to be notified when the commercial licenses become
+available.
+
+✉️ Contact
+
+[Dominik Macháček](https://ufal.mff.cuni.cz/dominik-machacek/), machacek@ufal.mff.cuni.cz
diff --git a/whisperlivekit/whisper_streaming_custom/backends.py b/whisperlivekit/whisper_streaming_custom/backends.py
index 32b5044..954c9d4 100644
--- a/whisperlivekit/whisper_streaming_custom/backends.py
+++ b/whisperlivekit/whisper_streaming_custom/backends.py
@@ -316,7 +316,8 @@ class SimulStreamingASR(ASRBase):
     def __init__(self, lan, modelsize=None, cache_dir=None, model_dir=None, logfile=sys.stderr, **kwargs):
         if not SIMULSTREAMING_AVAILABLE:
             raise ImportError("SimulStreaming dependencies are not available. Please install SimulStreaming requirements.")
-        
+        with open("whisperlivekit/simul_whisper/dual_license_simulstreaming.md", "r") as f:
+            print("*"*80 + f.read() + "*"*80)
         self.logfile = logfile
         self.transcribe_kargs = {}
         self.original_language = None if lan == "auto" else lan
diff --git a/whisperlivekit/whisper_streaming_custom/whisper_online.py b/whisperlivekit/whisper_streaming_custom/whisper_online.py
index 8a3869d..4320a75 100644
--- a/whisperlivekit/whisper_streaming_custom/whisper_online.py
+++ b/whisperlivekit/whisper_streaming_custom/whisper_online.py
@@ -69,7 +69,7 @@ def backend_factory(args):
     if backend == "openai-api":
         logger.debug("Using OpenAI API.")
         asr = OpenaiApiASR(lan=args.lan)
-    elif backend == "simulstreaming-whisper":
+    elif backend == "simulstreaming":
         logger.debug("Using SimulStreaming backend.")
         if not SIMULSTREAMING_AVAILABLE:
             raise ImportError(
@@ -128,7 +128,7 @@ def backend_factory(args):
 
     language = args.lan
     if args.task == "translate":
-        if backend != "simulstreaming-whisper":
+        if backend != "simulstreaming":
             asr.set_translate_task()
         tgt_language = "en"  # Whisper translates into English
     else:
@@ -142,7 +142,7 @@ def backend_factory(args):
     return asr, tokenizer
 
 def online_factory(args, asr, tokenizer, logfile=sys.stderr):
-    if args.backend == "simulstreaming-whisper":
+    if args.backend == "simulstreaming":
         if not SIMULSTREAMING_ONLINE_AVAILABLE:
             raise ImportError("SimulStreaming online processor is not available.")