made it skip text normalization when using other languages as it only supports english

This commit is contained in:
Fireblade 2025-02-16 14:16:18 -05:00
parent 4802128943
commit 9c0e328318
2 changed files with 8 additions and 4 deletions

View file

@ -106,6 +106,7 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]:
async def smart_split(
text: str,
max_tokens: int = settings.absolute_max_tokens,
lang_code: str = "a",
normalization_options: NormalizationOptions = NormalizationOptions()
) -> AsyncGenerator[Tuple[str, List[int]], None]:
"""Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens."""
@ -113,9 +114,12 @@ async def smart_split(
chunk_count = 0
logger.info(f"Starting smart split for {len(text)} chars")
# Normilize text
# Normalize text
if settings.advanced_text_normalization and normalization_options.normalize:
text=normalize_text(text,normalization_options)
if lang_code in ["a","b","en-us","en-gb"]:
text=normalize_text(text,normalization_options)
else:
logger.info("Skipping text normalization as it is only supported for english")
# Process all sentences
sentences = get_sentence_info(text)
@ -241,4 +245,4 @@ async def smart_split(
total_time = time.time() - start_time
logger.info(
f"Split completed in {total_time * 1000:.2f}ms, produced {chunk_count} chunks"
)
)

View file

@ -264,7 +264,7 @@ class TTSService:
# Process text in chunks with smart splitting
async for chunk_text, tokens in smart_split(text,normalization_options=normalization_options):
async for chunk_text, tokens in smart_split(text,lang_code=lang_code,normalization_options=normalization_options):
try:
# Process audio for chunk
async for result, chunk_data in self._process_chunk(