Quentin Fuxa
2025-07-31 11:42:06 +02:00
parent 56114d3071
commit 46efbdf1d9
5 changed files with 26 additions and 30 deletions

View File

@@ -34,7 +34,6 @@ setup(
},
package_data={
'whisperlivekit': ['web/*.html'],
'whisperlivekit.simul_whisper': ['dual_license_simulstreaming.md'],
'whisperlivekit.simul_whisper.whisper.assets': ['*.tiktoken', '*.npz'],
},
entry_points={

View File

@@ -1,25 +0,0 @@
📄 SimulStreaming (https://github.com/ufal/SimulStreaming) Licence
SimulStreaming is dual-licensed:
🔹 Non-Commercial Use
You may use SimulStreaming under the **PolyForm Noncommercial License 1.0.0** if you
obtain the code through the GitHub repository. This license is **free of charge**
and comes with **no obligations** for non-commercial users.
🔸 Commercial Use
Understanding who uses SimulStreaming commercially helps us improve and
prioritize development. Therefore, we want to **require registration** of those who acquire a commercial licence.
We plan to make the commercial licenceses **affordable** to SMEs and individuals. We
are considering to provide commercial licenses either for free or for symbolic
one-time fee, and maybe also provide additional support. You can share your preference via the [questionnaire](https://forms.cloud.microsoft/e/7tCxb4gJfB).
You can also leave your contact [there](https://forms.cloud.microsoft/e/7tCxb4gJfB) to be notified when the commercial licenses become
available.
✉️ Contact
[Dominik Macháček](https://ufal.mff.cuni.cz/dominik-machacek/), machacek@ufal.mff.cuni.cz

View File

@@ -0,0 +1,18 @@
SIMULSTREAMING_LICENSE = f"""
{"*"*80}
SimulStreaming (https://github.com/ufal/SimulStreaming) is dual-licensed:
🔹 Non-Commercial Use
You may use SimulStreaming under the PolyForm Noncommercial License 1.0.0 if you obtain the code through the GitHub repository. This license is free of charge and comes with no obligations for non-commercial users.
🔸 Commercial Use
Understanding who uses SimulStreaming commercially helps us improve and
prioritize development. Therefore, we want to require registration of those who acquire a commercial licence.
We plan to make the commercial licenceses affordable to SMEs and individuals. We are considering to provide commercial licenses either for free or for symbolic one-time fee, and maybe also provide additional support. You can share your preference via the questionnaire https://forms.cloud.microsoft/e/7tCxb4gJfB.
You can also leave your contact there: https://forms.cloud.microsoft/e/7tCxb4gJfB to be notified when the commercial licenses become
available.
✉️ Contact
Dominik Macháček (https://ufal.mff.cuni.cz/dominik-machacek/), machacek@ufal.mff.cuni.cz
{"*"*80}
"""

View File

@@ -4,6 +4,7 @@ import os
import urllib
import warnings
from typing import List, Optional, Union
import logging
import torch
from tqdm import tqdm
@@ -14,6 +15,8 @@ from .model import ModelDimensions, Whisper
from .transcribe import transcribe
from .version import __version__
logger = logging.getLogger(__name__)
_MODELS = {
"tiny.en": "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt",
"tiny": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt",
@@ -71,6 +74,7 @@ def _download(url: str, root: str, in_memory: bool) -> Union[bytes, str]:
)
with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
logger.info(f'Downloading model weights to {download_target}')
with tqdm(
total=int(source.info().get("Content-Length")),
ncols=80,

View File

@@ -10,7 +10,7 @@ except ImportError:
from typing import List
import numpy as np
from whisperlivekit.timed_objects import ASRToken
from whisperlivekit.simul_whisper.license_simulstreaming import SIMULSTREAMING_LICENSE
logger = logging.getLogger(__name__)
SIMULSTREAMING_ERROR_AND_INSTALLATION_INSTRUCTIONS = ImportError(
"""SimulStreaming dependencies are not available.
@@ -319,8 +319,7 @@ class SimulStreamingASR(ASRBase):
def __init__(self, lan, modelsize=None, cache_dir=None, model_dir=None, logfile=sys.stderr, **kwargs):
if not SIMULSTREAMING_AVAILABLE:
raise SIMULSTREAMING_ERROR_AND_INSTALLATION_INSTRUCTIONS
with open("whisperlivekit/simul_whisper/dual_license_simulstreaming.md", "r") as f:
print("*"*80 + f.read() + "*"*80)
logger.warning(SIMULSTREAMING_LICENSE)
self.logfile = logfile
self.transcribe_kargs = {}
self.original_language = None if lan == "auto" else lan
@@ -482,9 +481,10 @@ class SimulStreamingASR(ASRBase):
try:
if isinstance(audio, np.ndarray):
audio = torch.from_numpy(audio).float()
print(audio)
self.model.insert_audio(audio)
self.model.infer(True)
self.model.refresh_segment(complete=True)
logger.info("SimulStreaming model warmed up successfully")
except Exception as e:
logger.warning(f"SimulStreaming warmup failed: {e}")
logger.exception(f"SimulStreaming warmup failed: {e}")