From 80eb0baf5d242e952b83f79550c2388f7e0515f4 Mon Sep 17 00:00:00 2001 From: Aleksei Scripnic Date: Wed, 3 Jan 2024 10:06:05 +0000 Subject: [PATCH] Removed duplicate variable self.last_chunked_at I tried to find the difference between self.last_chunked_at and self.buffer_time_offset, and it took me a while to understand that they are exactly the same. I think it's better to get rid of one of the duplicates to make the code more readable. --- whisper_online.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/whisper_online.py b/whisper_online.py index 7672cc8..fd66319 100644 --- a/whisper_online.py +++ b/whisper_online.py @@ -328,7 +328,6 @@ class OnlineASRProcessor: self.transcript_buffer = HypothesisBuffer(logfile=self.logfile) self.commited = [] - self.last_chunked_at = 0 self.silence_iters = 0 @@ -340,7 +339,7 @@ class OnlineASRProcessor: "context" is the commited text that is inside the audio buffer. It is transcribed again and skipped. It is returned only for debugging and logging reasons. """ k = max(0,len(self.commited)-1) - while k > 0 and self.commited[k-1][1] > self.last_chunked_at: + while k > 0 and self.commited[k-1][1] > self.buffer_time_offset: k -= 1 p = self.commited[:k] @@ -451,7 +450,6 @@ class OnlineASRProcessor: cut_seconds = time - self.buffer_time_offset self.audio_buffer = self.audio_buffer[int(cut_seconds*self.SAMPLING_RATE):] self.buffer_time_offset = time - self.last_chunked_at = time def words_to_sentences(self, words): """Uses self.tokenizer for sentence segmentation of words.