solve #100

2026-04-24 15:21:41 +00:00 · 2025-03-24 20:38:47 +01:00
parent ccf99cecdf
commit e9022894b2
4 changed files with 18 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -72,6 +72,12 @@ pip install tokenize_uk  # If you work with Ukrainian text

 # If you want to use diarization
 pip install diart
+
+# Optional backends. Default is faster-whisper
+pip install whisperlivekit[whisper]           # Original Whisper backend
+pip install whisperlivekit[whisper-timestamped]  # Whisper with improved timestamps
+pip install whisperlivekit[mlx-whisper]       # Optimized for Apple Silicon
+pip install whisperlivekit[openai]            # OpenAI API backend
 ```

 ### Get access to 🎹 pyannote models
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,7 @@
 from setuptools import setup, find_packages
-
 setup(
    name="whisperlivekit",
-    version="0.1.2",
+    version="0.1.3",
    description="Real-time, Fully Local Whisper's Speech-to-Text and Speaker Diarization",
    long_description=open("README.md", "r", encoding="utf-8").read(),
    long_description_content_type="text/markdown",
@@ -22,6 +21,10 @@ setup(
        "diarization": ["diart"],
        "vac": ["torch"],
        "sentence": ["mosestokenizer", "wtpsplit"],
+        "whisper": ["whisper"],
+        "whisper-timestamped": ["whisper-timestamped"],
+        "mlx-whisper": ["mlx-whisper"],
+        "openai": ["openai"],
    },
    package_data={
        'whisperlivekit': ['web/*.html'],
--- a/whisperlivekit/core.py
+++ b/whisperlivekit/core.py
@@ -1,7 +1,7 @@
 try:
    from whisperlivekit.whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
-except:
-    from whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
+except ImportError:
+    from .whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
 from argparse import Namespace, ArgumentParser

 def parse_args():
--- a/whisperlivekit/whisper_streaming_custom/backends.py
+++ b/whisperlivekit/whisper_streaming_custom/backends.py
@@ -3,7 +3,10 @@ import logging
 import io
 import soundfile as sf
 import math
-import torch
+try: 
+    import torch
+except ImportError: 
+    torch = None
 from typing import List
 import numpy as np
 from whisperlivekit.timed_objects import ASRToken
@@ -102,7 +105,7 @@ class FasterWhisperASR(ASRBase):
            model_size_or_path = modelsize
        else:
            raise ValueError("Either modelsize or model_dir must be set")
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = "cuda" if torch and torch.cuda.is_available() else "cpu"
        compute_type = "float16" if device == "cuda" else "float32"

        model = WhisperModel(