From b1f703457737e6cbb7ceb504a26619894d726571 Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Tue, 28 Jan 2025 23:13:15 +0100 Subject: [PATCH 1/2] my version of timestamped text --- src/whisper/timestaped_words.py | 95 +++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/whisper/timestaped_words.py diff --git a/src/whisper/timestaped_words.py b/src/whisper/timestaped_words.py new file mode 100644 index 0000000..66f65e9 --- /dev/null +++ b/src/whisper/timestaped_words.py @@ -0,0 +1,95 @@ + +import numpy as np +from collections import namedtuple + +class TimeStampedSegment(): + def __init__(self, start=None, end=None, text=""): + self.start = start + self.end = end + self.text = text + + def __getitem__(self, key): + if key == 0: + return self.start + elif key == 1: + return self.end + elif key == 2: + return self.text + elif isinstance(key, slice): + raise NotImplementedError('Slicing not supported') + + def __str__(self): + return f'{self.start} - {self.end}: {self.text}' + + def __repr__(self): + return self.__str__() + + def shift(self, shift): + return TimeStampedSegment(self.start + shift, self.end + shift, self.text) + + def append_text(self, text): + self.text += text + + def __eq__(self, other): + return self.start == other.start and self.end == other.end and self.text == other.text + + def __add__(self, other): + if isinstance(other, (int, float)): + return self.shift(other) + elif isinstance(other, str): + return TimeStampedSegment(self.start, self.end, self.text + other) + else: + raise TypeError(f"unsupported operand type(s) for +: '{type(self).__name__}' and '{type(other).__name__}'") + + + + +class TimeStampedText(list): + + def __init__(self, time_stamped_segments: list[TimeStampedSegment]): + super().__init__(time_stamped_segments) + self._index = 0 + + def words(self): + return [segment.text for segment in self] + def starts(self): + return [segment.start for segment in self] + + def ends(self): + return [segment.end for segment in self] + + + def concatenate(self, sep:str, offset=0)->TimeStampedSegment: + """ + Concatenates the timestamped words or sentences into a single sequence with timing information. + This method joins all words in the sequence using the specified separator and preserves + the timing information from the first to the last word. + Args: + sep (str): Separator string used to join the words together + offset (float, optional): Time offset to add to begin/end timestamps. Defaults to 0. + Returns: + TimeStampedSegment: A new segment containing: + - Start time: First word's start time + offset + - End time: Last word's end time + offset + - Text: All words joined by separator + Examples: + >>> seg = TimeStampedSegment([(1.0, 2.0, "hello"), (2.1, 3.0, "world!")]) + >>> result = seg.concatenate(" ") + >>> print(result) + (1.0, 3.0, "hello world!") + Notes: + Returns an empty TimeStampedSegment if the current segment contains no words. + """ + + + if len(self) == 0: + return TimeStampedSegment() + + combined_text = sep.join(self.words()) + + b = offset + self[0][0] + e = offset + self[-1][1] + return TimeStampedSegment(b, e, combined_text) + + + From 15205f31d1c7ed9c16c6a5d70f047fb4c03865c4 Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Tue, 28 Jan 2025 23:17:21 +0100 Subject: [PATCH 2/2] add doctest --- src/whisper/timestaped_words.py | 129 ++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 58 deletions(-) diff --git a/src/whisper/timestaped_words.py b/src/whisper/timestaped_words.py index 66f65e9..f4aea66 100644 --- a/src/whisper/timestaped_words.py +++ b/src/whisper/timestaped_words.py @@ -1,33 +1,55 @@ +from typing import List -import numpy as np -from collections import namedtuple +class TimeStampedSegment: + """ + Represents a segment of text with start and end timestamps. -class TimeStampedSegment(): - def __init__(self, start=None, end=None, text=""): + Attributes: + start (float): The start time of the segment. + end (float): The end time of the segment. + text (str): The text of the segment. + """ + def __init__(self, start: float, end: float, text: str): self.start = start self.end = end self.text = text - def __getitem__(self, key): - if key == 0: - return self.start - elif key == 1: - return self.end - elif key == 2: - return self.text - elif isinstance(key, slice): - raise NotImplementedError('Slicing not supported') - def __str__(self): return f'{self.start} - {self.end}: {self.text}' def __repr__(self): return self.__str__() - def shift(self, shift): + def shift(self, shift: float): + """ + Shifts the segment by a given amount of time. + + Args: + shift (float): The amount of time to shift the segment. + + Returns: + TimeStampedSegment: A new segment shifted by the given amount of time. + + Example: + >>> segment = TimeStampedSegment(0.0, 1.0, "Hello") + >>> segment.shift(1.0) + 1.0 - 2.0: Hello + """ return TimeStampedSegment(self.start + shift, self.end + shift, self.text) - def append_text(self, text): + def append_text(self, text: str): + """ + Appends text to the segment. + + Args: + text (str): The text to append. + + Example: + >>> segment = TimeStampedSegment(0.0, 1.0, "Hello") + >>> segment.append_text("!") + >>> segment + 0.0 - 1.0: Hello! + """ self.text += text def __eq__(self, other): @@ -40,56 +62,47 @@ class TimeStampedSegment(): return TimeStampedSegment(self.start, self.end, self.text + other) else: raise TypeError(f"unsupported operand type(s) for +: '{type(self).__name__}' and '{type(other).__name__}'") - - - -class TimeStampedText(list): +class TimeStampedText: + """ + Represents a collection of TimeStampedSegment instances. - def __init__(self, time_stamped_segments: list[TimeStampedSegment]): - super().__init__(time_stamped_segments) - self._index = 0 + Attributes: + segments (List[TimeStampedSegment]): The list of segments. + """ + def __init__(self): + self.segments: List[TimeStampedSegment] = [] - def words(self): - return [segment.text for segment in self] - def starts(self): - return [segment.start for segment in self] - - def ends(self): - return [segment.end for segment in self] - - - def concatenate(self, sep:str, offset=0)->TimeStampedSegment: + def add_segment(self, segment: TimeStampedSegment): """ - Concatenates the timestamped words or sentences into a single sequence with timing information. - This method joins all words in the sequence using the specified separator and preserves - the timing information from the first to the last word. + Adds a segment to the collection. + Args: - sep (str): Separator string used to join the words together - offset (float, optional): Time offset to add to begin/end timestamps. Defaults to 0. - Returns: - TimeStampedSegment: A new segment containing: - - Start time: First word's start time + offset - - End time: Last word's end time + offset - - Text: All words joined by separator - Examples: - >>> seg = TimeStampedSegment([(1.0, 2.0, "hello"), (2.1, 3.0, "world!")]) - >>> result = seg.concatenate(" ") - >>> print(result) - (1.0, 3.0, "hello world!") - Notes: - Returns an empty TimeStampedSegment if the current segment contains no words. + segment (TimeStampedSegment): The segment to add. + + Example: + >>> tst = TimeStampedText() + >>> tst.add_segment(TimeStampedSegment(0.0, 1.0, "Hello")) + >>> tst.add_segment(TimeStampedSegment(1.0, 2.0, "world")) + >>> len(tst) + 2 """ - + self.segments.append(segment) - if len(self) == 0: - return TimeStampedSegment() + def __repr__(self): + return f"TimeStampedText(segments={self.segments})" - combined_text = sep.join(self.words()) - - b = offset + self[0][0] - e = offset + self[-1][1] - return TimeStampedSegment(b, e, combined_text) + def __iter__(self): + return iter(self.segments) + def __getitem__(self, index): + return self.segments[index] + def __len__(self): + return len(self.segments) + + # TODO: a function from_whisper_res() +if __name__ == "__main__": + import doctest + doctest.testmod(verbose=True) \ No newline at end of file