remove whisper_fastapi_online_server.py

2026-03-07 22:33:36 +00:00 · 2025-04-12 18:21:04 +02:00
parent b56fcffde1
commit 2ab3dac948
3 changed files with 5 additions and 88 deletions
--- a/README.md
+++ b/README.md
@@ -15,14 +15,14 @@

 ## 🚀 Overview

-This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and lets you transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with an example frontend that you can customize for your own needs. Everything runs locally on your machine ✨
+This project is based on [Whisper Streaming](https://github.com/ufal/whisper_streaming) and lets you transcribe audio directly from your browser. WhisperLiveKit provides a complete backend solution for real-time speech transcription with a functional and simple frontend that you can customize for your own needs. Everything runs locally on your machine ✨

 ### 🔄 Architecture

 WhisperLiveKit consists of two main components:

 - **Backend (Server)**: FastAPI WebSocket server that processes audio and provides real-time transcription
- **Frontend Example**: Basic HTML & JavaScript implementation that demonstrates how to capture and stream audio
+- **Frontend Example**: Basic HTML & JavaScript implementation to capture and stream audio

 > **Note**: We recommend installing this library on the server/backend. For the frontend, you can use and adapt the provided HTML template from [whisperlivekit/web/live_transcription.html](https://github.com/QuentinFuxa/WhisperLiveKit/blob/main/whisperlivekit/web/live_transcription.html) for your specific use case.

@@ -33,13 +33,13 @@ WhisperLiveKit consists of two main components:
 - **🔒 Fully Local** - All processing happens on your machine - no data sent to external servers
 - **📱 Multi-User Support** - Handle multiple users simultaneously with a single backend/server
 
-### ⚙️ Differences from [Whisper Streaming](https://github.com/ufal/whisper_streaming)
+### ⚙️ Core ifferences from [Whisper Streaming](https://github.com/ufal/whisper_streaming)

+- **Automatic Silence Chunking** – Automatically chunks when no audio is detected to limit buffer size
 - **Multi-User Support** – Handles multiple users simultaneously by decoupling backend and online ASR
+- **Confidence Validation** – Immediately validate high-confidence tokens for faster inference
 - **MLX Whisper Backend** – Optimized for Apple Silicon for faster local processing
 - **Buffering Preview** – Displays unvalidated transcription segments
- **Confidence Validation** – Immediately validate high-confidence tokens for faster inference
- **Apple Silicon Optimized** - MLX backend for faster local processing on Mac

 ## 📖 Quick Start

--- a/demo.png
+++ b/demo.png
--- a/whisper_fastapi_online_server.py
+++ b/whisper_fastapi_online_server.py
@@ -1,83 +0,0 @@
-from contextlib import asynccontextmanager
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from fastapi.responses import HTMLResponse
-from fastapi.middleware.cors import CORSMiddleware
-
-from whisperlivekit import WhisperLiveKit, parse_args
-from whisperlivekit.audio_processor import AudioProcessor
-
-import asyncio
-import logging
-import os
-
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-logging.getLogger().setLevel(logging.WARNING)
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-
-kit = None
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    global kit
-    kit = WhisperLiveKit()
-    yield
-
-app = FastAPI(lifespan=lifespan)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-@app.get("/")
-async def get():
-    return HTMLResponse(kit.web_interface())
-
-
-async def handle_websocket_results(websocket, results_generator):
-    """Consumes results from the audio processor and sends them via WebSocket."""
-    try:
-        async for response in results_generator:
-            await websocket.send_json(response)
-    except Exception as e:
-        logger.warning(f"Error in WebSocket results handler: {e}")
-
-
-@app.websocket("/asr")
-async def websocket_endpoint(websocket: WebSocket):
-    audio_processor = AudioProcessor()
-
-    await websocket.accept()
-    logger.info("WebSocket connection opened.")
-            
-    results_generator = await audio_processor.create_tasks()
-    websocket_task = asyncio.create_task(handle_websocket_results(websocket, results_generator))
-
-    try:
-        while True:
-            message = await websocket.receive_bytes()
-            await audio_processor.process_audio(message)
-    except WebSocketDisconnect:
-        logger.warning("WebSocket disconnected.")
-    finally:
-        websocket_task.cancel()
-        await audio_processor.cleanup()
-        logger.info("WebSocket endpoint cleaned up.")
-
-if __name__ == "__main__":
-    import uvicorn
-    
-    args = parse_args()
-    
-    uvicorn.run(
-        "whisper_fastapi_online_server:app", 
-        host=args.host, 
-        port=args.port, 
-        reload=False,
-        log_level="info",
-        lifespan="on",
-    )