mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-07 22:33:36 +00:00
13
LICENSE
13
LICENSE
@@ -1,10 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Quentin Fuxa.
|
||||
Based on:
|
||||
- The original work by ÚFAL. License: https://github.com/ufal/whisper_streaming/blob/main/LICENSE
|
||||
- The work by Snakers4 (silero-vad). License: https://github.com/snakers4/silero-vad/blob/f6b1294cb27590fb2452899df98fb234dfef1134/LICENSE
|
||||
- The work in Diart by juanmc2005. License: https://github.com/juanmc2005/diart/blob/main/LICENSE
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -26,8 +22,7 @@ SOFTWARE.
|
||||
|
||||
---
|
||||
|
||||
Third-party components included in this software:
|
||||
|
||||
- **whisper_streaming** by ÚFAL – MIT License – https://github.com/ufal/whisper_streaming
|
||||
- **silero-vad** by Snakers4 – MIT License – https://github.com/snakers4/silero-vad
|
||||
- **Diart** by juanmc2005 – MIT License – https://github.com/juanmc2005/diart
|
||||
Based on:
|
||||
- **whisper_streaming** by ÚFAL – MIT License – https://github.com/ufal/whisper_streaming. The original work by ÚFAL. License: https://github.com/ufal/whisper_streaming/blob/main/LICENSE
|
||||
- **silero-vad** by Snakers4 – MIT License – https://github.com/snakers4/silero-vad. The work by Snakers4 (silero-vad). License: https://github.com/snakers4/silero-vad/blob/f6b1294cb27590fb2452899df98fb234dfef1134/LICENSE
|
||||
- **Diart** by juanmc2005 – MIT License – https://github.com/juanmc2005/diart. The work in Diart by juanmc2005. License: https://github.com/juanmc2005/diart/blob/main/LICENSE
|
||||
@@ -9,8 +9,8 @@
|
||||
<p align="center">
|
||||
<a href="https://pypi.org/project/whisperlivekit/"><img alt="PyPI Version" src="https://img.shields.io/pypi/v/whisperlivekit?color=g"></a>
|
||||
<a href="https://pepy.tech/project/whisperlivekit"><img alt="PyPI Downloads" src="https://static.pepy.tech/personalized-badge/whisperlivekit?period=total&units=international_system&left_color=grey&right_color=brightgreen&left_text=downloads"></a>
|
||||
<a href="https://pypi.org/project/whisperlivekit/"><img alt="Python Versions" src="https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-dark_green"></a>
|
||||
<a href="https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/QuentinFuxa/WhisperLiveKit?color=blue"></a>
|
||||
<a href="https://pypi.org/project/whisperlivekit/"><img alt="Python Versions" src="https://img.shields.io/badge/python-3.9--3.13-dark_green"></a>
|
||||
<a href="https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/badge/License-MIT-dark_green"></a>
|
||||
</p>
|
||||
|
||||
## 🚀 Overview
|
||||
|
||||
@@ -83,10 +83,33 @@ class AudioProcessor:
|
||||
|
||||
def start_ffmpeg_decoder(self):
|
||||
"""Start FFmpeg process for WebM to PCM conversion."""
|
||||
return (ffmpeg.input("pipe:0", format="webm")
|
||||
.output("pipe:1", format="s16le", acodec="pcm_s16le",
|
||||
ac=self.channels, ar=str(self.sample_rate))
|
||||
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True))
|
||||
try:
|
||||
return (ffmpeg.input("pipe:0", format="webm")
|
||||
.output("pipe:1", format="s16le", acodec="pcm_s16le",
|
||||
ac=self.channels, ar=str(self.sample_rate))
|
||||
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True))
|
||||
except FileNotFoundError:
|
||||
error = """
|
||||
FFmpeg is not installed or not found in your system's PATH.
|
||||
Please install FFmpeg to enable audio processing.
|
||||
|
||||
Installation instructions:
|
||||
|
||||
# Ubuntu/Debian:
|
||||
sudo apt update && sudo apt install ffmpeg
|
||||
|
||||
# macOS (using Homebrew):
|
||||
brew install ffmpeg
|
||||
|
||||
# Windows:
|
||||
# 1. Download the latest static build from https://ffmpeg.org/download.html
|
||||
# 2. Extract the archive (e.g., to C:\\FFmpeg).
|
||||
# 3. Add the 'bin' directory (e.g., C:\\FFmpeg\\bin) to your system's PATH environment variable.
|
||||
|
||||
After installation, please restart the application.
|
||||
"""
|
||||
logger.error(error)
|
||||
raise FileNotFoundError(error)
|
||||
|
||||
async def restart_ffmpeg(self):
|
||||
"""Restart the FFmpeg process after failure."""
|
||||
|
||||
@@ -343,15 +343,15 @@ class OnlineASRProcessor:
|
||||
)
|
||||
sentences.append(sentence)
|
||||
return sentences
|
||||
def finish(self) -> Transcript:
|
||||
|
||||
def finish(self) -> List[ASRToken]:
|
||||
"""
|
||||
Flush the remaining transcript when processing ends.
|
||||
"""
|
||||
remaining_tokens = self.transcript_buffer.buffer
|
||||
final_transcript = self.concatenate_tokens(remaining_tokens)
|
||||
logger.debug(f"Final non-committed transcript: {final_transcript}")
|
||||
logger.debug(f"Final non-committed tokens: {remaining_tokens}")
|
||||
self.buffer_time_offset += len(self.audio_buffer) / self.SAMPLING_RATE
|
||||
return final_transcript
|
||||
return remaining_tokens
|
||||
|
||||
def concatenate_tokens(
|
||||
self,
|
||||
@@ -384,7 +384,8 @@ class VACOnlineASRProcessor:
|
||||
def __init__(self, online_chunk_size: float, *args, **kwargs):
|
||||
self.online_chunk_size = online_chunk_size
|
||||
self.online = OnlineASRProcessor(*args, **kwargs)
|
||||
|
||||
self.asr = self.online.asr
|
||||
|
||||
# Load a VAD model (e.g. Silero VAD)
|
||||
import torch
|
||||
model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad")
|
||||
@@ -455,7 +456,7 @@ class VACOnlineASRProcessor:
|
||||
self.buffer_offset += max(0, len(self.audio_buffer) - self.SAMPLING_RATE)
|
||||
self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE:]
|
||||
|
||||
def process_iter(self) -> Transcript:
|
||||
def process_iter(self) -> List[ASRToken]:
|
||||
"""
|
||||
Depending on the VAD status and the amount of accumulated audio,
|
||||
process the current audio chunk.
|
||||
@@ -467,9 +468,9 @@ class VACOnlineASRProcessor:
|
||||
return self.online.process_iter()
|
||||
else:
|
||||
logger.debug("No online update, only VAD")
|
||||
return Transcript(None, None, "")
|
||||
return []
|
||||
|
||||
def finish(self) -> Transcript:
|
||||
def finish(self) -> List[ASRToken]:
|
||||
"""Finish processing by flushing any remaining text."""
|
||||
result = self.online.finish()
|
||||
self.current_online_chunk_buffer_size = 0
|
||||
@@ -480,4 +481,4 @@ class VACOnlineASRProcessor:
|
||||
"""
|
||||
Get the unvalidated buffer in string format.
|
||||
"""
|
||||
return self.online.concatenate_tokens(self.online.transcript_buffer.buffer).text
|
||||
return self.online.concatenate_tokens(self.online.transcript_buffer.buffer)
|
||||
|
||||
Reference in New Issue
Block a user