Merge pull request #45 from QuentinFuxa/solving-ffmpeg-process-freezing-unexpectedly

Add get_buffer method to retrieve unvalidated buffer in string format
This commit is contained in:
Quentin Fuxa
2025-02-13 00:38:45 +01:00
committed by GitHub
2 changed files with 17 additions and 7 deletions

View File

@@ -85,6 +85,7 @@ class HypothesisBuffer:
self.committed_in_buffer.pop(0)
class OnlineASRProcessor:
"""
Processes incoming audio in a streaming fashion, calling the ASR system
@@ -163,6 +164,13 @@ class OnlineASRProcessor:
context_text = self.asr.sep.join(token.text for token in non_prompt_tokens)
return self.asr.sep.join(prompt_list[::-1]), context_text
def get_buffer(self):
"""
Get the unvalidated buffer in string format.
"""
return self.concatenate_tokens(self.transcript_buffer.buffer).text
def process_iter(self) -> Transcript:
"""
Processes the current audio buffer.
@@ -413,4 +421,10 @@ class VACOnlineASRProcessor:
result = self.online.finish()
self.current_online_chunk_buffer_size = 0
self.is_currently_final = False
return result
return result
def get_buffer(self):
"""
Get the unvalidated buffer in string format.
"""
return self.online.concatenate_tokens(self.online.transcript_buffer.buffer).text

View File

@@ -158,12 +158,8 @@ async def websocket_endpoint(websocket: WebSocket):
})
full_transcription += transcription.text
if args.vac:
transcript = online.online.concatenate_tokens(online.online.transcript_buffer.buffer)
else:
transcript = online.concatenate_tokens(online.transcript_buffer.buffer)
buffer = transcript.text
buffer = online.get_buffer()
if buffer in full_transcription: # With VAC, the buffer is not updated until the next chunk is processed
buffer = ""