mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-07 14:23:18 +00:00
punctuation is now checked in timed_object
This commit is contained in:
@@ -6,11 +6,10 @@ from whisperlivekit.timed_objects import Line, format_time
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
PUNCTUATION_MARKS = {'.', '!', '?', '。', '!', '?'}
|
||||
CHECK_AROUND = 4
|
||||
|
||||
def is_punctuation(token):
|
||||
if token.text.strip() in PUNCTUATION_MARKS:
|
||||
if token.is_punctuation():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@ from dataclasses import dataclass, field
|
||||
from typing import Optional, Any
|
||||
from datetime import timedelta
|
||||
|
||||
PUNCTUATION_MARKS = {'.', '!', '?', '。', '!', '?'}
|
||||
|
||||
def format_time(seconds: float) -> str:
|
||||
"""Format seconds as HH:MM:SS."""
|
||||
return str(timedelta(seconds=int(seconds)))
|
||||
@@ -16,6 +18,9 @@ class TimedText:
|
||||
probability: Optional[float] = None
|
||||
is_dummy: Optional[bool] = False
|
||||
|
||||
def is_punctuation(self):
|
||||
return self.text.strip() in PUNCTUATION_MARKS
|
||||
|
||||
def overlaps_with(self, other: 'TimedText') -> bool:
|
||||
return not (self.end <= other.start or other.end <= self.start)
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
#In diarization case, we may want to translate just one speaker, or at least start the sentences there
|
||||
|
||||
PUNCTUATION_MARKS = {'.', '!', '?', '。', '!', '?'}
|
||||
|
||||
MIN_SILENCE_DURATION_DEL_BUFFER = 3 #After a silence of x seconds, we consider the model should not use the buffer, even if the previous
|
||||
# sentence is not finished.
|
||||
|
||||
@@ -111,7 +109,7 @@ class OnlineTranslation:
|
||||
if len(self.buffer) < self.len_processed_buffer + 3: #nothing new to process
|
||||
return self.validated + [self.translation_remaining]
|
||||
while i < len(self.buffer):
|
||||
if self.buffer[i].text in PUNCTUATION_MARKS:
|
||||
if self.buffer[i].is_punctuation():
|
||||
translation_sentence = self.translate_tokens(self.buffer[:i+1])
|
||||
self.validated.append(translation_sentence)
|
||||
self.buffer = self.buffer[i+1:]
|
||||
|
||||
Reference in New Issue
Block a user