mirror of
https://github.com/QuentinFuxa/WhisperLiveKit.git
synced 2026-03-07 22:33:36 +00:00
60 lines
1.4 KiB
Python
60 lines
1.4 KiB
Python
from dataclasses import dataclass
|
|
from typing import Optional
|
|
from datetime import timedelta
|
|
|
|
def format_time(seconds: float) -> str:
|
|
"""Format seconds as HH:MM:SS."""
|
|
return str(timedelta(seconds=int(seconds)))
|
|
|
|
|
|
@dataclass
|
|
class TimedText:
|
|
start: Optional[float]
|
|
end: Optional[float]
|
|
text: Optional[str] = ''
|
|
speaker: Optional[int] = -1
|
|
probability: Optional[float] = None
|
|
is_dummy: Optional[bool] = False
|
|
|
|
@dataclass
|
|
class ASRToken(TimedText):
|
|
def with_offset(self, offset: float) -> "ASRToken":
|
|
"""Return a new token with the time offset added."""
|
|
return ASRToken(self.start + offset, self.end + offset, self.text, self.speaker, self.probability)
|
|
|
|
@dataclass
|
|
class Sentence(TimedText):
|
|
pass
|
|
|
|
@dataclass
|
|
class Transcript(TimedText):
|
|
pass
|
|
|
|
@dataclass
|
|
class SpeakerSegment(TimedText):
|
|
"""Represents a segment of audio attributed to a specific speaker.
|
|
No text nor probability is associated with this segment.
|
|
"""
|
|
pass
|
|
|
|
@dataclass
|
|
class Translation(TimedText):
|
|
pass
|
|
|
|
@dataclass
|
|
class Silence():
|
|
duration: float
|
|
|
|
|
|
@dataclass
|
|
class Line(TimedText):
|
|
translation: str = ''
|
|
|
|
def to_dict(self):
|
|
return {
|
|
'speaker': int(self.speaker),
|
|
'text': self.text,
|
|
'translation': self.translation,
|
|
'start': format_time(self.start),
|
|
'end': format_time(self.end),
|
|
} |