clearer log messages for sentence segmentation

This commit is contained in:
Silas Kieser
2025-01-21 11:18:54 +01:00
parent ce56264241
commit 42d2784c20
2 changed files with 10 additions and 4 deletions

View File

@@ -194,10 +194,16 @@ class OnlineASRProcessor:
def chunk_completed_sentence(self):
if self.commited == []:
return
logger.debug("COMPLETED SENTENCE: ", [s[2] for s in self.commited])
raw_text = self.asr.sep.join([s[2] for s in self.commited])
logger.debug(f"[Sentence-segmentation] Raw Text: {raw_text}")
sents = self.words_to_sentences(self.commited)
for s in sents:
logger.debug(f"\t\tSENT: {s}")
logger.debug(f"[Sentence-segmentation] completed sentence: {s}")
if len(sents) < 2:
return
while len(sents) > 2:
@@ -205,7 +211,7 @@ class OnlineASRProcessor:
# we will continue with audio processing at this timestamp
chunk_at = sents[-2][1]
logger.debug(f"--- sentence chunked at {chunk_at:2.2f}")
logger.debug(f"[Sentence-segmentation]: sentence chunked at {chunk_at:2.2f}")
self.chunk_at(chunk_at)
def chunk_completed_segment(self, res):

View File

@@ -58,7 +58,7 @@ def create_tokenizer(lan):
lan
in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split()
):
logger.debug(
logger.warning(
f"{lan} code is not supported by wtpsplit. Going to use None lang_code option."
)
lan = None