Merge branch 'remsky:master' into fixes

2025-08-05 16:48:53 +00:00 · 2025-03-02 21:39:17 -05:00 · 2025-03-02 21:39:17 -05:00 · f2c5bc1b71
commit f2c5bc1b71
parent b3d5f4de08 d67570ab21
3 changed files with 24 additions and 6 deletions
--- a/api/src/services/streaming_audio_writer.py
+++ b/api/src/services/streaming_audio_writer.py
@ -26,8 +26,8 @@ class StreamingAudioWriter:
            if self.format != "pcm":
                self.output_buffer = BytesIO()
                self.container = av.open(self.output_buffer, mode="w", format=self.format)
                #print(av.codecs_available)
                self.stream = self.container.add_stream(codec_map[self.format],sample_rate=self.sample_rate,layout='mono' if self.channels == 1 else 'stereo')
                self.stream.bit_rate = 128000
        else:
            raise ValueError(f"Unsupported format: {format}")
--- a/api/src/services/text_processing/normalizer.py
+++ b/api/src/services/text_processing/normalizer.py
@ -270,7 +270,6 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str
    text = text.replace(chr(8216), "'").replace(chr(8217), "'")
    text = text.replace("«", chr(8220)).replace("»", chr(8221))
    text = text.replace(chr(8220), '"').replace(chr(8221), '"')
    text = text.replace("(", "«").replace(")", "»")
    # Handle CJK punctuation and some non standard chars
    for a, b in zip("、。！，：；？–", ",.!,:;?-"):
--- a/api/src/services/text_processing/text_processor.py
+++ b/api/src/services/text_processing/text_processor.py
@ -2,7 +2,7 @@
 import re
 import time
-from typing import AsyncGenerator, List, Tuple
+from typing import AsyncGenerator, Dict, List, Tuple
 from loguru import logger
@ -12,6 +12,9 @@ from .phonemizer import phonemize
 from .vocabulary import tokenize
 from ...structures.schemas import NormalizationOptions
 # Pre-compiled regex patterns for performance
 CUSTOM_PHONEMES = re.compile(r"(\[([^\]]|\n)*?\])(\(\/([^\/)]|\n)*?\/\))")
 def process_text_chunk(
    text: str, language: str = "a", skip_phonemize: bool = False
 ) -> List[int]:
@ -85,12 +88,21 @@ def process_text(text: str, language: str = "a") -> List[int]:
    return process_text_chunk(text, language)
-def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]:
+def get_sentence_info(text: str, custom_phenomes_list: Dict[str, str]) -> List[Tuple[str, List[int], int]]:
    """Process all sentences and return info."""
    sentences = re.split(r"([.!?;:])(?=\s|$)", text)
    phoneme_length, min_value = len(custom_phenomes_list), 0
    results = []
    for i in range(0, len(sentences), 2):
        sentence = sentences[i].strip()
        for replaced in range(min_value, phoneme_length):
            current_id = f"</|custom_phonemes_{replaced}|/>"
            if current_id in sentence:
                sentence = sentence.replace(current_id, custom_phenomes_list.pop(current_id))
                min_value += 1
        punct = sentences[i + 1] if i + 1 < len(sentences) else ""
        if not sentence:
@ -102,6 +114,10 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]:
    return results
 def handle_custom_phonemes(s: re.Match[str], phenomes_list: Dict[str,str]) -> str:
    latest_id = f"</|custom_phonemes_{len(phenomes_list)}|/>"
    phenomes_list[latest_id] = s.group(0).strip()
    return latest_id
 async def smart_split(
    text: str, 
@ -114,15 +130,18 @@ async def smart_split(
    chunk_count = 0
    logger.info(f"Starting smart split for {len(text)} chars")
    custom_phoneme_list = {}
    # Normalize text
    if settings.advanced_text_normalization and normalization_options.normalize:
        if lang_code in ["a","b","en-us","en-gb"]:
            text = CUSTOM_PHONEMES.sub(lambda s: handle_custom_phonemes(s, custom_phoneme_list), text)
            text=normalize_text(text,normalization_options)
        else:
            logger.info("Skipping text normalization as it is only supported for english")
    # Process all sentences
-    sentences = get_sentence_info(text)
+    sentences = get_sentence_info(text, custom_phoneme_list)
    current_chunk = []
    current_tokens = []