mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-05-02 03:46:17 +00:00
- Extend test_backend_offline.py with WER and timestamp accuracy metrics computed via whisperlivekit.metrics against ground truth transcripts. - Add --benchmark flag to auto-detect all installed backends and run each (backend, policy) combination in sequence. - Add --policy flag to override the streaming policy. - Add detect_available_backends() probing faster-whisper, mlx-whisper, voxtral-mlx, voxtral (HF), and openai-whisper. - Add print_cross_backend_comparison() with per-combo averages. - Add run_benchmark.py for comprehensive multi-model benchmarking. - Add BENCHMARK.md with full results on Apple M4: speed, WER, timestamp accuracy, VAC impact, and recommendations. - Add ground truth transcript JSON files for all audio test files.
382 lines
4.9 KiB
JSON
382 lines
4.9 KiB
JSON
[
|
|
{
|
|
"word": "Transcription",
|
|
"start": 0.0,
|
|
"end": 0.6
|
|
},
|
|
{
|
|
"word": "technology",
|
|
"start": 0.6,
|
|
"end": 1.24
|
|
},
|
|
{
|
|
"word": "has",
|
|
"start": 1.24,
|
|
"end": 1.5
|
|
},
|
|
{
|
|
"word": "improved",
|
|
"start": 1.5,
|
|
"end": 1.96
|
|
},
|
|
{
|
|
"word": "so",
|
|
"start": 1.96,
|
|
"end": 2.32
|
|
},
|
|
{
|
|
"word": "much",
|
|
"start": 2.32,
|
|
"end": 2.68
|
|
},
|
|
{
|
|
"word": "in",
|
|
"start": 2.68,
|
|
"end": 2.94
|
|
},
|
|
{
|
|
"word": "the",
|
|
"start": 2.94,
|
|
"end": 3.02
|
|
},
|
|
{
|
|
"word": "past",
|
|
"start": 3.02,
|
|
"end": 3.24
|
|
},
|
|
{
|
|
"word": "few",
|
|
"start": 3.24,
|
|
"end": 3.5
|
|
},
|
|
{
|
|
"word": "years.",
|
|
"start": 3.5,
|
|
"end": 3.96
|
|
},
|
|
{
|
|
"word": "Have",
|
|
"start": 4.56,
|
|
"end": 4.74
|
|
},
|
|
{
|
|
"word": "you",
|
|
"start": 4.74,
|
|
"end": 4.9
|
|
},
|
|
{
|
|
"word": "noticed",
|
|
"start": 4.9,
|
|
"end": 5.26
|
|
},
|
|
{
|
|
"word": "how",
|
|
"start": 5.26,
|
|
"end": 5.52
|
|
},
|
|
{
|
|
"word": "accurate",
|
|
"start": 5.52,
|
|
"end": 6.08
|
|
},
|
|
{
|
|
"word": "real",
|
|
"start": 6.08,
|
|
"end": 6.42
|
|
},
|
|
{
|
|
"word": "-time",
|
|
"start": 6.42,
|
|
"end": 6.74
|
|
},
|
|
{
|
|
"word": "speech",
|
|
"start": 6.74,
|
|
"end": 7.24
|
|
},
|
|
{
|
|
"word": "to",
|
|
"start": 7.24,
|
|
"end": 7.46
|
|
},
|
|
{
|
|
"word": "text",
|
|
"start": 7.46,
|
|
"end": 7.78
|
|
},
|
|
{
|
|
"word": "is",
|
|
"start": 7.78,
|
|
"end": 8.0
|
|
},
|
|
{
|
|
"word": "now?",
|
|
"start": 8.0,
|
|
"end": 8.3
|
|
},
|
|
{
|
|
"word": "Absolutely.",
|
|
"start": 8.7,
|
|
"end": 9.16
|
|
},
|
|
{
|
|
"word": "I",
|
|
"start": 10.04,
|
|
"end": 10.38
|
|
},
|
|
{
|
|
"word": "use",
|
|
"start": 10.38,
|
|
"end": 10.56
|
|
},
|
|
{
|
|
"word": "it",
|
|
"start": 10.56,
|
|
"end": 10.76
|
|
},
|
|
{
|
|
"word": "all",
|
|
"start": 10.76,
|
|
"end": 10.9
|
|
},
|
|
{
|
|
"word": "the",
|
|
"start": 10.9,
|
|
"end": 11.04
|
|
},
|
|
{
|
|
"word": "time",
|
|
"start": 11.04,
|
|
"end": 11.32
|
|
},
|
|
{
|
|
"word": "for",
|
|
"start": 11.32,
|
|
"end": 11.54
|
|
},
|
|
{
|
|
"word": "taking",
|
|
"start": 11.54,
|
|
"end": 11.86
|
|
},
|
|
{
|
|
"word": "notes",
|
|
"start": 11.86,
|
|
"end": 12.16
|
|
},
|
|
{
|
|
"word": "during",
|
|
"start": 12.16,
|
|
"end": 12.54
|
|
},
|
|
{
|
|
"word": "meetings.",
|
|
"start": 12.54,
|
|
"end": 12.94
|
|
},
|
|
{
|
|
"word": "It's",
|
|
"start": 13.6,
|
|
"end": 13.8
|
|
},
|
|
{
|
|
"word": "amazing",
|
|
"start": 13.8,
|
|
"end": 14.1
|
|
},
|
|
{
|
|
"word": "how",
|
|
"start": 14.1,
|
|
"end": 14.48
|
|
},
|
|
{
|
|
"word": "it",
|
|
"start": 14.48,
|
|
"end": 14.62
|
|
},
|
|
{
|
|
"word": "can",
|
|
"start": 14.62,
|
|
"end": 14.74
|
|
},
|
|
{
|
|
"word": "recognise",
|
|
"start": 14.74,
|
|
"end": 15.24
|
|
},
|
|
{
|
|
"word": "different",
|
|
"start": 15.24,
|
|
"end": 15.68
|
|
},
|
|
{
|
|
"word": "speakers",
|
|
"start": 15.68,
|
|
"end": 16.16
|
|
},
|
|
{
|
|
"word": "and",
|
|
"start": 16.16,
|
|
"end": 16.8
|
|
},
|
|
{
|
|
"word": "even",
|
|
"start": 16.8,
|
|
"end": 17.1
|
|
},
|
|
{
|
|
"word": "add",
|
|
"start": 17.1,
|
|
"end": 17.44
|
|
},
|
|
{
|
|
"word": "punctuation.",
|
|
"start": 17.44,
|
|
"end": 18.36
|
|
},
|
|
{
|
|
"word": "Yeah,",
|
|
"start": 18.88,
|
|
"end": 19.16
|
|
},
|
|
{
|
|
"word": "but",
|
|
"start": 19.36,
|
|
"end": 19.52
|
|
},
|
|
{
|
|
"word": "sometimes",
|
|
"start": 19.52,
|
|
"end": 20.16
|
|
},
|
|
{
|
|
"word": "noise",
|
|
"start": 20.16,
|
|
"end": 20.54
|
|
},
|
|
{
|
|
"word": "can",
|
|
"start": 20.54,
|
|
"end": 20.8
|
|
},
|
|
{
|
|
"word": "still",
|
|
"start": 20.8,
|
|
"end": 21.1
|
|
},
|
|
{
|
|
"word": "cause",
|
|
"start": 21.1,
|
|
"end": 21.44
|
|
},
|
|
{
|
|
"word": "mistakes.",
|
|
"start": 21.44,
|
|
"end": 21.94
|
|
},
|
|
{
|
|
"word": "Does",
|
|
"start": 22.68,
|
|
"end": 22.9
|
|
},
|
|
{
|
|
"word": "this",
|
|
"start": 22.9,
|
|
"end": 23.12
|
|
},
|
|
{
|
|
"word": "system",
|
|
"start": 23.12,
|
|
"end": 23.46
|
|
},
|
|
{
|
|
"word": "handle",
|
|
"start": 23.46,
|
|
"end": 23.88
|
|
},
|
|
{
|
|
"word": "that",
|
|
"start": 23.88,
|
|
"end": 24.12
|
|
},
|
|
{
|
|
"word": "well?",
|
|
"start": 24.12,
|
|
"end": 24.42
|
|
},
|
|
{
|
|
"word": "It",
|
|
"start": 24.42,
|
|
"end": 25.32
|
|
},
|
|
{
|
|
"word": "does",
|
|
"start": 25.32,
|
|
"end": 25.48
|
|
},
|
|
{
|
|
"word": "a",
|
|
"start": 25.48,
|
|
"end": 25.62
|
|
},
|
|
{
|
|
"word": "pretty",
|
|
"start": 25.62,
|
|
"end": 25.88
|
|
},
|
|
{
|
|
"word": "good",
|
|
"start": 25.88,
|
|
"end": 26.08
|
|
},
|
|
{
|
|
"word": "job",
|
|
"start": 26.08,
|
|
"end": 26.32
|
|
},
|
|
{
|
|
"word": "filtering",
|
|
"start": 26.32,
|
|
"end": 26.8
|
|
},
|
|
{
|
|
"word": "noise,",
|
|
"start": 26.8,
|
|
"end": 27.18
|
|
},
|
|
{
|
|
"word": "especially",
|
|
"start": 27.36,
|
|
"end": 28.0
|
|
},
|
|
{
|
|
"word": "with",
|
|
"start": 28.0,
|
|
"end": 28.28
|
|
},
|
|
{
|
|
"word": "models",
|
|
"start": 28.28,
|
|
"end": 28.62
|
|
},
|
|
{
|
|
"word": "that",
|
|
"start": 28.62,
|
|
"end": 28.94
|
|
},
|
|
{
|
|
"word": "use",
|
|
"start": 28.94,
|
|
"end": 29.22
|
|
},
|
|
{
|
|
"word": "voice",
|
|
"start": 29.22,
|
|
"end": 29.54
|
|
},
|
|
{
|
|
"word": "active.",
|
|
"start": 29.54,
|
|
"end": 29.9
|
|
}
|
|
] |