mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-07 14:23:18 +00:00
speaker -1 are no more sent in websocket - no buffer when their is a silence
This commit is contained in:
@@ -301,7 +301,9 @@ class AudioProcessor:
|
||||
transcription_lag_s = max(0.0, time() - self.beg_loop - self.end_buffer)
|
||||
asr_processing_logs = f"internal_buffer={asr_internal_buffer_duration_s:.2f}s | lag={transcription_lag_s:.2f}s |"
|
||||
if type(item) is Silence:
|
||||
asr_processing_logs += f" + Silence of = {item.duration:.2f}s | last_end = {self.tokens[-1].end} |"
|
||||
asr_processing_logs += f" + Silence of = {item.duration:.2f}s"
|
||||
if self.tokens:
|
||||
asr_processing_logs += " | last_end = {self.tokens[-1].end} |"
|
||||
logger.info(asr_processing_logs)
|
||||
|
||||
if type(item) is Silence:
|
||||
@@ -445,12 +447,15 @@ class AudioProcessor:
|
||||
last_end_diarized = 0
|
||||
undiarized_text = []
|
||||
current_time = time() - self.beg_loop if self.beg_loop else None
|
||||
tokens, buffer_transcription = handle_silences(tokens, buffer_transcription, current_time, self.silence)
|
||||
tokens, buffer_transcription, buffer_diarization = handle_silences(tokens, buffer_transcription, buffer_diarization, current_time, self.silence)
|
||||
for token in tokens:
|
||||
speaker = token.speaker
|
||||
|
||||
if speaker == -1: #Speaker -1 means no attributed by diarization. In the frontend, it should appear under 'Speaker 1'
|
||||
speaker = 1
|
||||
|
||||
# Handle diarization
|
||||
if self.args.diarization:
|
||||
if self.args.diarization and not tokens[-1].speaker == -2:
|
||||
if (speaker in [-1, 0]) and token.end >= end_attributed_speaker:
|
||||
undiarized_text.append(token.text)
|
||||
continue
|
||||
|
||||
@@ -77,9 +77,9 @@ def no_token_to_silence(tokens):
|
||||
new_tokens.append(token)
|
||||
return new_tokens
|
||||
|
||||
def ends_with_silence(tokens, buffer_transcription, current_time, vac_detected_silence):
|
||||
def ends_with_silence(tokens, buffer_transcription, buffer_diarization, current_time, vac_detected_silence):
|
||||
if not tokens:
|
||||
return []
|
||||
return [], buffer_transcription, buffer_diarization
|
||||
last_token = tokens[-1]
|
||||
if tokens and (
|
||||
current_time - last_token.end >= END_SILENCE_DURATION
|
||||
@@ -97,13 +97,14 @@ def ends_with_silence(tokens, buffer_transcription, current_time, vac_detected_s
|
||||
probability=0.95
|
||||
)
|
||||
)
|
||||
#We validate the buffer has because of the silence
|
||||
return tokens
|
||||
buffer_transcription = "" # for whisperstreaming backend, we should probably validate the buffer has because of the silence
|
||||
buffer_diarization = ""
|
||||
return tokens, buffer_transcription, buffer_diarization
|
||||
|
||||
|
||||
def handle_silences(tokens, buffer_transcription, current_time, vac_detected_silence):
|
||||
def handle_silences(tokens, buffer_transcription, buffer_diarization, current_time, vac_detected_silence):
|
||||
tokens = blank_to_silence(tokens) #useful for simulstreaming backend which tends to generate [BLANK_AUDIO] text
|
||||
tokens = no_token_to_silence(tokens)
|
||||
tokens = ends_with_silence(tokens, buffer_transcription, current_time, vac_detected_silence)
|
||||
return tokens, buffer_transcription
|
||||
tokens, buffer_transcription, buffer_diarization = ends_with_silence(tokens, buffer_transcription, buffer_diarization, current_time, vac_detected_silence)
|
||||
return tokens, buffer_transcription, buffer_diarization
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
let isRecording = false;
|
||||
let websocket = null;
|
||||
let recorder = null;
|
||||
let chunkDuration = 1000;
|
||||
let chunkDuration = 100;
|
||||
let websocketUrl = "ws://localhost:8000/asr";
|
||||
let userClosing = false;
|
||||
let wakeLock = null;
|
||||
@@ -269,9 +269,7 @@ function renderLinesWithBuffer(
|
||||
speakerLabel = `<span class='loading'><span class="spinner"></span><span id='timeInfo'><span class="loading-diarization-value">${fmt1(
|
||||
remaining_time_diarization
|
||||
)}</span> second(s) of audio are undergoing diarization</span></span>`;
|
||||
} else if (item.speaker == -1) {
|
||||
speakerLabel = `<span id="speaker">Speaker 1<span id='timeInfo'>${timeInfo}</span></span>`;
|
||||
} else if (item.speaker !== -1 && item.speaker !== 0) {
|
||||
} else if (item.speaker !== 0) {
|
||||
speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user