From 9c0e328318d4fc4a73cb042334d7101b2635744e Mon Sep 17 00:00:00 2001 From: Fireblade Date: Sun, 16 Feb 2025 14:16:18 -0500 Subject: [PATCH] made it skip text normalization when using other languages as it only supports english --- api/src/services/text_processing/text_processor.py | 10 +++++++--- api/src/services/tts_service.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/api/src/services/text_processing/text_processor.py b/api/src/services/text_processing/text_processor.py index 4466cca..018cc51 100644 --- a/api/src/services/text_processing/text_processor.py +++ b/api/src/services/text_processing/text_processor.py @@ -106,6 +106,7 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]: async def smart_split( text: str, max_tokens: int = settings.absolute_max_tokens, + lang_code: str = "a", normalization_options: NormalizationOptions = NormalizationOptions() ) -> AsyncGenerator[Tuple[str, List[int]], None]: """Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens.""" @@ -113,9 +114,12 @@ async def smart_split( chunk_count = 0 logger.info(f"Starting smart split for {len(text)} chars") - # Normilize text + # Normalize text if settings.advanced_text_normalization and normalization_options.normalize: - text=normalize_text(text,normalization_options) + if lang_code in ["a","b","en-us","en-gb"]: + text=normalize_text(text,normalization_options) + else: + logger.info("Skipping text normalization as it is only supported for english") # Process all sentences sentences = get_sentence_info(text) @@ -241,4 +245,4 @@ async def smart_split( total_time = time.time() - start_time logger.info( f"Split completed in {total_time * 1000:.2f}ms, produced {chunk_count} chunks" - ) + ) \ No newline at end of file diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py index cb94f94..f1eb627 100644 --- a/api/src/services/tts_service.py +++ b/api/src/services/tts_service.py @@ -264,7 +264,7 @@ class TTSService: # Process text in chunks with smart splitting - async for chunk_text, tokens in smart_split(text,normalization_options=normalization_options): + async for chunk_text, tokens in smart_split(text,lang_code=lang_code,normalization_options=normalization_options): try: # Process audio for chunk async for result, chunk_data in self._process_chunk(