mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-07 06:14:05 +00:00
- Extend test_backend_offline.py with WER and timestamp accuracy metrics computed via whisperlivekit.metrics against ground truth transcripts. - Add --benchmark flag to auto-detect all installed backends and run each (backend, policy) combination in sequence. - Add --policy flag to override the streaming policy. - Add detect_available_backends() probing faster-whisper, mlx-whisper, voxtral-mlx, voxtral (HF), and openai-whisper. - Add print_cross_backend_comparison() with per-combo averages. - Add run_benchmark.py for comprehensive multi-model benchmarking. - Add BENCHMARK.md with full results on Apple M4: speed, WER, timestamp accuracy, VAC impact, and recommendations. - Add ground truth transcript JSON files for all audio test files.
58 lines
1.7 KiB
Python
58 lines
1.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Generate word-level timestamped transcripts using faster-whisper (offline).
|
|
|
|
Produces one JSON file per audio with: [{word, start, end}, ...]
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from faster_whisper import WhisperModel
|
|
|
|
AUDIO_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
FILES = [
|
|
("00_00_07_english_1_speaker.wav", "en"),
|
|
("00_00_16_french_1_speaker.wav", "fr"),
|
|
("00_00_30_english_3_speakers.wav", "en"),
|
|
]
|
|
|
|
def main():
|
|
print("Loading faster-whisper model (base, cpu, float32)...")
|
|
model = WhisperModel("base", device="cpu", compute_type="float32")
|
|
|
|
for filename, lang in FILES:
|
|
audio_path = os.path.join(AUDIO_DIR, filename)
|
|
out_path = os.path.join(
|
|
AUDIO_DIR, filename.rsplit(".", 1)[0] + ".transcript.json"
|
|
)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Transcribing: {filename} (language={lang})")
|
|
print(f"{'='*60}")
|
|
|
|
segments, info = model.transcribe(
|
|
audio_path, word_timestamps=True, language=lang
|
|
)
|
|
|
|
words = []
|
|
for segment in segments:
|
|
if segment.words:
|
|
for w in segment.words:
|
|
words.append({
|
|
"word": w.word.strip(),
|
|
"start": round(w.start, 3),
|
|
"end": round(w.end, 3),
|
|
})
|
|
print(f" {w.start:6.2f} - {w.end:6.2f} {w.word.strip()}")
|
|
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
|
json.dump(words, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"\n -> {len(words)} words written to {os.path.basename(out_path)}")
|
|
|
|
print("\nDone.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|