made it skip text normalization when using other languages as it only supports english

This commit is contained in:
Fireblade 2025-02-16 14:16:18 -05:00
parent 4802128943
commit 9c0e328318
2 changed files with 8 additions and 4 deletions

View file

@ -106,6 +106,7 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]:
async def smart_split( async def smart_split(
text: str, text: str,
max_tokens: int = settings.absolute_max_tokens, max_tokens: int = settings.absolute_max_tokens,
lang_code: str = "a",
normalization_options: NormalizationOptions = NormalizationOptions() normalization_options: NormalizationOptions = NormalizationOptions()
) -> AsyncGenerator[Tuple[str, List[int]], None]: ) -> AsyncGenerator[Tuple[str, List[int]], None]:
"""Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens.""" """Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens."""
@ -113,9 +114,12 @@ async def smart_split(
chunk_count = 0 chunk_count = 0
logger.info(f"Starting smart split for {len(text)} chars") logger.info(f"Starting smart split for {len(text)} chars")
# Normilize text # Normalize text
if settings.advanced_text_normalization and normalization_options.normalize: if settings.advanced_text_normalization and normalization_options.normalize:
text=normalize_text(text,normalization_options) if lang_code in ["a","b","en-us","en-gb"]:
text=normalize_text(text,normalization_options)
else:
logger.info("Skipping text normalization as it is only supported for english")
# Process all sentences # Process all sentences
sentences = get_sentence_info(text) sentences = get_sentence_info(text)
@ -241,4 +245,4 @@ async def smart_split(
total_time = time.time() - start_time total_time = time.time() - start_time
logger.info( logger.info(
f"Split completed in {total_time * 1000:.2f}ms, produced {chunk_count} chunks" f"Split completed in {total_time * 1000:.2f}ms, produced {chunk_count} chunks"
) )

View file

@ -264,7 +264,7 @@ class TTSService:
# Process text in chunks with smart splitting # Process text in chunks with smart splitting
async for chunk_text, tokens in smart_split(text,normalization_options=normalization_options): async for chunk_text, tokens in smart_split(text,lang_code=lang_code,normalization_options=normalization_options):
try: try:
# Process audio for chunk # Process audio for chunk
async for result, chunk_data in self._process_chunk( async for result, chunk_data in self._process_chunk(