From e42523af84e8c38268ad36f09c46e32322073214 Mon Sep 17 00:00:00 2001 From: Quentin Fuxa Date: Sun, 17 Aug 2025 01:29:34 +0200 Subject: [PATCH] VAC activated by default --- README.md | 6 +----- pyproject.toml | 21 +++++++++------------ 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 7986247..7086561 100644 --- a/README.md +++ b/README.md @@ -78,9 +78,6 @@ brew install ffmpeg ### Optional Dependencies ```bash -# Voice Activity Controller (prevents hallucinations) -pip install torch - # Sentence-based buffer trimming pip install mosestokenizer wtpsplit pip install tokenize_uk # If you work with Ukrainian text @@ -93,7 +90,6 @@ pip install whisperlivekit[whisper] # Original Whisper pip install whisperlivekit[whisper-timestamped] # Improved timestamps pip install whisperlivekit[mlx-whisper] # Apple Silicon optimization pip install whisperlivekit[openai] # OpenAI API -pip install whisperlivekit[simulstreaming] ``` ### 🎹 Pyannote Models Setup @@ -195,7 +191,7 @@ WhisperLiveKit offers extensive configuration options: | `--punctuation-split` | Use punctuation to improve speaker boundaries | `True` | | `--confidence-validation` | Use confidence scores for faster validation | `False` | | `--min-chunk-size` | Minimum audio chunk size (seconds) | `1.0` | -| `--vac` | Use Voice Activity Controller | `False` | +| `--vac` | Use Voice Activity Controller | `True` | | `--no-vad` | Disable Voice Activity Detection | `False` | | `--buffer_trimming` | Buffer trimming strategy (`sentence` or `segment`) | `segment` | | `--warmup-file` | Audio file path for model warmup | `jfk.wav` | diff --git a/pyproject.toml b/pyproject.toml index f5f20f3..e1d4140 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,24 +27,21 @@ dependencies = [ "soundfile", "faster-whisper", "uvicorn", - "websockets" -] - -[project.optional-dependencies] -diarization = ["diart"] -vac = ["torch"] -sentence = ["mosestokenizer", "wtpsplit"] -whisper = ["whisper"] -whisper-timestamped = ["whisper-timestamped"] -mlx-whisper = ["mlx-whisper"] -openai = ["openai"] -simulstreaming = [ + "websockets", "torch", "tqdm", "tiktoken", 'triton>=2.0.0,<3; platform_machine == "x86_64" and (sys_platform == "linux" or sys_platform == "linux2")' ] +[project.optional-dependencies] +diarization = ["diart"] +sentence = ["mosestokenizer", "wtpsplit"] +whisper = ["whisper"] +whisper-timestamped = ["whisper-timestamped"] +mlx-whisper = ["mlx-whisper"] +openai = ["openai"] + [project.urls] Homepage = "https://github.com/QuentinFuxa/WhisperLiveKit"