diff --git a/README.md b/README.md index 97bd9d8..9b6e2bb 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,14 @@ ## 🚀 Overview -This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and lets you transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with an example frontend that you can customize for your own needs. Everything runs locally on your machine ✨ +This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and lets you transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with a functional and simple frontend that you can customize for your own needs. Everything runs locally on your machine ✨ ### 🔄 Architecture WhisperLiveKit consists of two main components: - **Backend (Server)**: FastAPI WebSocket server that processes audio and provides real-time transcription -- **Frontend Example**: Basic HTML & JavaScript implementation that demonstrates how to capture and stream audio +- **Frontend Example**: Basic HTML & JavaScript implementation to capture and stream audio > **Note**: We recommend installing this library on the server/backend. For the frontend, you can use and adapt the provided HTML template from [whisperlivekit/web/live_transcription.html](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/web/live_transcription.html) for your specific use case. @@ -33,13 +33,13 @@ WhisperLiveKit consists of two main components: - **🔒 Fully Local** - All processing happens on your machine - no data sent to external servers - **📱 Multi-User Support** - Handle multiple users simultaneously with a single backend/server -### ⚙️ Differences from [Whisper Streaming](https://github.com/ufal/whisper_streaming) +### ⚙️ Core ifferences from [Whisper Streaming](https://github.com/ufal/whisper_streaming) +- **Automatic Silence Chunking** – Automatically chunks when no audio is detected to limit buffer size - **Multi-User Support** – Handles multiple users simultaneously by decoupling backend and online ASR +- **Confidence Validation** – Immediately validate high-confidence tokens for faster inference - **MLX Whisper Backend** – Optimized for Apple Silicon for faster local processing - **Buffering Preview** – Displays unvalidated transcription segments -- **Confidence Validation** – Immediately validate high-confidence tokens for faster inference -- **Apple Silicon Optimized** - MLX backend for faster local processing on Mac ## 📖 Quick Start diff --git a/demo.png b/demo.png index 9e2e519..8b80b6b 100644 Binary files a/demo.png and b/demo.png differ diff --git a/whisper_fastapi_online_server.py b/whisper_fastapi_online_server.py deleted file mode 100644 index d3578f5..0000000 --- a/whisper_fastapi_online_server.py +++ /dev/null @@ -1,83 +0,0 @@ -from contextlib import asynccontextmanager -from fastapi import FastAPI, WebSocket, WebSocketDisconnect -from fastapi.responses import HTMLResponse -from fastapi.middleware.cors import CORSMiddleware - -from whisperlivekit import WhisperLiveKit, parse_args -from whisperlivekit.audio_processor import AudioProcessor - -import asyncio -import logging -import os - -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") -logging.getLogger().setLevel(logging.WARNING) -logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) - -kit = None - -@asynccontextmanager -async def lifespan(app: FastAPI): - global kit - kit = WhisperLiveKit() - yield - -app = FastAPI(lifespan=lifespan) -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - - -@app.get("/") -async def get(): - return HTMLResponse(kit.web_interface()) - - -async def handle_websocket_results(websocket, results_generator): - """Consumes results from the audio processor and sends them via WebSocket.""" - try: - async for response in results_generator: - await websocket.send_json(response) - except Exception as e: - logger.warning(f"Error in WebSocket results handler: {e}") - - -@app.websocket("/asr") -async def websocket_endpoint(websocket: WebSocket): - audio_processor = AudioProcessor() - - await websocket.accept() - logger.info("WebSocket connection opened.") - - results_generator = await audio_processor.create_tasks() - websocket_task = asyncio.create_task(handle_websocket_results(websocket, results_generator)) - - try: - while True: - message = await websocket.receive_bytes() - await audio_processor.process_audio(message) - except WebSocketDisconnect: - logger.warning("WebSocket disconnected.") - finally: - websocket_task.cancel() - await audio_processor.cleanup() - logger.info("WebSocket endpoint cleaned up.") - -if __name__ == "__main__": - import uvicorn - - args = parse_args() - - uvicorn.run( - "whisper_fastapi_online_server:app", - host=args.host, - port=args.port, - reload=False, - log_level="info", - lifespan="on", - ) \ No newline at end of file