diff --git a/README.md b/README.md index 451c82f..f570eb7 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,6 @@ Go to `chrome-extension` for instructions. | **Apple Silicon optimizations** | `mlx-whisper` | | **Translation** | `nllw` | | *[Not recommanded]* Speaker diarization with Diart | `diart` | -| *[Not recommanded]* Original Whisper backend | `whisper` | | *[Not recommanded]* Improved timestamps backend | `whisper-timestamped` | | OpenAI API backend | `openai` | diff --git a/pyproject.toml b/pyproject.toml index e0bb146..0ed41ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,15 +54,15 @@ packages = [ "whisperlivekit", "whisperlivekit.diarization", "whisperlivekit.simul_whisper", - "whisperlivekit.simul_whisper.whisper", - "whisperlivekit.simul_whisper.whisper.assets", - "whisperlivekit.simul_whisper.whisper.normalizers", + "whisperlivekit.whisper", + "whisperlivekit.whisper.assets", + "whisperlivekit.whisper.normalizers", "whisperlivekit.web", - "whisperlivekit.whisper_streaming_custom", + "whisperlivekit.local_agreement", "whisperlivekit.vad_models" ] [tool.setuptools.package-data] whisperlivekit = ["web/*.html", "web/*.css", "web/*.js", "web/src/*.svg"] -"whisperlivekit.simul_whisper.whisper.assets" = ["*.tiktoken", "*.npz"] +"whisperlivekit.whisper.assets" = ["*.tiktoken", "*.npz"] "whisperlivekit.vad_models" = ["*.jit", "*.onnx"] diff --git a/whisperlivekit/core.py b/whisperlivekit/core.py index d25bb86..3cdbbf1 100644 --- a/whisperlivekit/core.py +++ b/whisperlivekit/core.py @@ -1,9 +1,6 @@ -try: - from whisperlivekit.whisper_streaming_custom.whisper_online import backend_factory - from whisperlivekit.whisper_streaming_custom.online_asr import OnlineASRProcessor -except ImportError: - from .whisper_streaming_custom.whisper_online import backend_factory - from .whisper_streaming_custom.online_asr import OnlineASRProcessor +from whisperlivekit.local_agreement.whisper_online import backend_factory +from whisperlivekit.simul_whisper import SimulStreamingASR +from whisperlivekit.local_agreement.online_asr import OnlineASRProcessor from argparse import Namespace import sys @@ -82,9 +79,7 @@ class TranscriptionEngine: self.vac_model = load_silero_vad(onnx=use_onnx) if self.args.transcription: - if self.args.backend == "simulstreaming": - from whisperlivekit.simul_whisper import SimulStreamingASR - + if self.args.backend == "simulstreaming": simulstreaming_params = { "disable_fast_encoder": False, "custom_alignment_heads": None, diff --git a/whisperlivekit/simul_whisper/whisper/assets/__init__.py b/whisperlivekit/local_agreement/__init__.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/assets/__init__.py rename to whisperlivekit/local_agreement/__init__.py diff --git a/whisperlivekit/whisper_streaming_custom/backends.py b/whisperlivekit/local_agreement/backends.py similarity index 100% rename from whisperlivekit/whisper_streaming_custom/backends.py rename to whisperlivekit/local_agreement/backends.py diff --git a/whisperlivekit/whisper_streaming_custom/online_asr.py b/whisperlivekit/local_agreement/online_asr.py similarity index 100% rename from whisperlivekit/whisper_streaming_custom/online_asr.py rename to whisperlivekit/local_agreement/online_asr.py diff --git a/whisperlivekit/whisper_streaming_custom/whisper_online.py b/whisperlivekit/local_agreement/whisper_online.py similarity index 100% rename from whisperlivekit/whisper_streaming_custom/whisper_online.py rename to whisperlivekit/local_agreement/whisper_online.py diff --git a/whisperlivekit/simul_whisper/backend.py b/whisperlivekit/simul_whisper/backend.py index 04b4705..0e4a4f2 100644 --- a/whisperlivekit/simul_whisper/backend.py +++ b/whisperlivekit/simul_whisper/backend.py @@ -6,17 +6,18 @@ import logging import platform from whisperlivekit.timed_objects import ASRToken, Transcript, ChangeSpeaker from whisperlivekit.warmup import load_file -from .whisper import load_model, tokenizer -from .whisper.audio import TOKENS_PER_SECOND +from whisperlivekit.whisper import load_model, tokenizer +from whisperlivekit.whisper.audio import TOKENS_PER_SECOND import os import gc from pathlib import Path -logger = logging.getLogger(__name__) import torch from whisperlivekit.simul_whisper.config import AlignAttConfig from whisperlivekit.simul_whisper.simul_whisper import PaddedAlignAttWhisper -from whisperlivekit.simul_whisper.whisper import tokenizer + +logger = logging.getLogger(__name__) + try: from .mlx_encoder import mlx_model_mapping, load_mlx_encoder diff --git a/whisperlivekit/simul_whisper/beam.py b/whisperlivekit/simul_whisper/beam.py index c226f76..cf61be7 100644 --- a/whisperlivekit/simul_whisper/beam.py +++ b/whisperlivekit/simul_whisper/beam.py @@ -1,4 +1,4 @@ -from .whisper.decoding import PyTorchInference +from whisperlivekit.whisper.decoding import PyTorchInference # extention of PyTorchInference for beam search class BeamPyTorchInference(PyTorchInference): diff --git a/whisperlivekit/simul_whisper/config.py b/whisperlivekit/simul_whisper/config.py index 08f72c1..2562ce0 100644 --- a/whisperlivekit/simul_whisper/config.py +++ b/whisperlivekit/simul_whisper/config.py @@ -1,5 +1,3 @@ -# This code was originally in simul_whisper/transcriber/simul_whisper.py . It is adapted a lot for SimulStreaming. - from dataclasses import dataclass, field from typing import Literal diff --git a/whisperlivekit/simul_whisper/simul_whisper.py b/whisperlivekit/simul_whisper/simul_whisper.py index 9582a5f..697abec 100644 --- a/whisperlivekit/simul_whisper/simul_whisper.py +++ b/whisperlivekit/simul_whisper/simul_whisper.py @@ -6,12 +6,12 @@ import logging import torch import torch.nn.functional as F -from .whisper import load_model, DecodingOptions, tokenizer +from whisperlivekit.whisper import load_model, DecodingOptions, tokenizer from .config import AlignAttConfig from whisperlivekit.timed_objects import ASRToken -from .whisper.audio import log_mel_spectrogram, TOKENS_PER_SECOND, pad_or_trim, N_SAMPLES, N_FRAMES -from .whisper.timing import median_filter -from .whisper.decoding import GreedyDecoder, BeamSearchDecoder, SuppressTokens, detect_language +from whisperlivekit.whisper.audio import log_mel_spectrogram, TOKENS_PER_SECOND, pad_or_trim, N_SAMPLES, N_FRAMES +from whisperlivekit.whisper.timing import median_filter +from whisperlivekit.whisper.decoding import GreedyDecoder, BeamSearchDecoder, SuppressTokens, detect_language from .beam import BeamPyTorchInference from .eow_detection import fire_at_boundary, load_cif import os diff --git a/whisperlivekit/simul_whisper/whisper/__init__.py b/whisperlivekit/whisper/__init__.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/__init__.py rename to whisperlivekit/whisper/__init__.py diff --git a/whisperlivekit/simul_whisper/whisper/__main__.py b/whisperlivekit/whisper/__main__.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/__main__.py rename to whisperlivekit/whisper/__main__.py diff --git a/whisperlivekit/whisper_streaming_custom/__init__.py b/whisperlivekit/whisper/assets/__init__.py similarity index 100% rename from whisperlivekit/whisper_streaming_custom/__init__.py rename to whisperlivekit/whisper/assets/__init__.py diff --git a/whisperlivekit/simul_whisper/whisper/assets/gpt2.tiktoken b/whisperlivekit/whisper/assets/gpt2.tiktoken similarity index 100% rename from whisperlivekit/simul_whisper/whisper/assets/gpt2.tiktoken rename to whisperlivekit/whisper/assets/gpt2.tiktoken diff --git a/whisperlivekit/simul_whisper/whisper/assets/mel_filters.npz b/whisperlivekit/whisper/assets/mel_filters.npz similarity index 100% rename from whisperlivekit/simul_whisper/whisper/assets/mel_filters.npz rename to whisperlivekit/whisper/assets/mel_filters.npz diff --git a/whisperlivekit/simul_whisper/whisper/assets/multilingual.tiktoken b/whisperlivekit/whisper/assets/multilingual.tiktoken similarity index 100% rename from whisperlivekit/simul_whisper/whisper/assets/multilingual.tiktoken rename to whisperlivekit/whisper/assets/multilingual.tiktoken diff --git a/whisperlivekit/simul_whisper/whisper/audio.py b/whisperlivekit/whisper/audio.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/audio.py rename to whisperlivekit/whisper/audio.py diff --git a/whisperlivekit/simul_whisper/whisper/decoding.py b/whisperlivekit/whisper/decoding.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/decoding.py rename to whisperlivekit/whisper/decoding.py diff --git a/whisperlivekit/simul_whisper/whisper/model.py b/whisperlivekit/whisper/model.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/model.py rename to whisperlivekit/whisper/model.py diff --git a/whisperlivekit/simul_whisper/whisper/normalizers/__init__.py b/whisperlivekit/whisper/normalizers/__init__.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/normalizers/__init__.py rename to whisperlivekit/whisper/normalizers/__init__.py diff --git a/whisperlivekit/simul_whisper/whisper/normalizers/basic.py b/whisperlivekit/whisper/normalizers/basic.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/normalizers/basic.py rename to whisperlivekit/whisper/normalizers/basic.py diff --git a/whisperlivekit/simul_whisper/whisper/normalizers/english.json b/whisperlivekit/whisper/normalizers/english.json similarity index 100% rename from whisperlivekit/simul_whisper/whisper/normalizers/english.json rename to whisperlivekit/whisper/normalizers/english.json diff --git a/whisperlivekit/simul_whisper/whisper/normalizers/english.py b/whisperlivekit/whisper/normalizers/english.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/normalizers/english.py rename to whisperlivekit/whisper/normalizers/english.py diff --git a/whisperlivekit/simul_whisper/whisper/timing.py b/whisperlivekit/whisper/timing.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/timing.py rename to whisperlivekit/whisper/timing.py diff --git a/whisperlivekit/simul_whisper/whisper/tokenizer.py b/whisperlivekit/whisper/tokenizer.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/tokenizer.py rename to whisperlivekit/whisper/tokenizer.py diff --git a/whisperlivekit/simul_whisper/whisper/transcribe.py b/whisperlivekit/whisper/transcribe.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/transcribe.py rename to whisperlivekit/whisper/transcribe.py diff --git a/whisperlivekit/simul_whisper/whisper/triton_ops.py b/whisperlivekit/whisper/triton_ops.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/triton_ops.py rename to whisperlivekit/whisper/triton_ops.py diff --git a/whisperlivekit/simul_whisper/whisper/utils.py b/whisperlivekit/whisper/utils.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/utils.py rename to whisperlivekit/whisper/utils.py diff --git a/whisperlivekit/simul_whisper/whisper/version.py b/whisperlivekit/whisper/version.py similarity index 100% rename from whisperlivekit/simul_whisper/whisper/version.py rename to whisperlivekit/whisper/version.py