From ab1c21130e9f3898c89a25226e8dd457591b3ae1 Mon Sep 17 00:00:00 2001 From: Fireblade Date: Mon, 10 Feb 2025 21:45:05 -0500 Subject: [PATCH 01/10] Made the api use the normalizer, fixed the wrong version of espeak, added better normilzation, improved the sentence splitting, fixed some formatting --- api/src/core/config.py | 5 +- api/src/inference/kokoro_v1.py | 5 +- api/src/services/audio.py | 91 +++++++++++++++++-- .../services/text_processing/normalizer.py | 38 +++++++- .../text_processing/text_processor.py | 27 +++--- api/src/services/tts_service.py | 22 ++++- docker/cpu/Dockerfile | 13 +-- docker/gpu/Dockerfile | 16 ++-- pyproject.toml | 3 +- start-gpu.bat | 10 ++ 10 files changed, 187 insertions(+), 43 deletions(-) create mode 100644 start-gpu.bat diff --git a/api/src/core/config.py b/api/src/core/config.py index d2e369b..d361a5c 100644 --- a/api/src/core/config.py +++ b/api/src/core/config.py @@ -28,8 +28,11 @@ class Settings(BaseSettings): target_min_tokens: int = 175 # Target minimum tokens per chunk target_max_tokens: int = 250 # Target maximum tokens per chunk absolute_max_tokens: int = 450 # Absolute maximum tokens per chunk + advanced_text_normalization: bool = True # Preproesses the text before misiki which leads - gap_trim_ms: int = 250 # Amount to trim from streaming chunk ends in milliseconds + gap_trim_ms: int = 1 # Base amount to trim from streaming chunk ends in milliseconds + dynamic_gap_trim_padding_ms: int = 410 # Padding to add to dynamic gap trim + dynamic_gap_trim_padding_char_multiplier: dict[str,float] = {".":1,"!":0.9,"?":1,",":0.8} # Web Player Settings enable_web_player: bool = True # Whether to serve the web player UI diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py index 9f5e206..248593e 100644 --- a/api/src/inference/kokoro_v1.py +++ b/api/src/inference/kokoro_v1.py @@ -144,7 +144,7 @@ class KokoroV1(BaseModelBackend): pipeline = self._get_pipeline(pipeline_lang_code) logger.debug( - f"Generating audio from tokens with lang_code '{pipeline_lang_code}': '{tokens[:100]}...'" + f"Generating audio from tokens with lang_code '{pipeline_lang_code}': '{tokens[:100]}{'...' if len(tokens) > 100 else ''}'" ) for result in pipeline.generate_from_tokens( tokens=tokens, voice=voice_path, speed=speed, model=self._model @@ -192,7 +192,6 @@ class KokoroV1(BaseModelBackend): """ if not self.is_loaded: raise RuntimeError("Model not loaded") - try: # Memory management for GPU if self._device == "cuda": @@ -237,7 +236,7 @@ class KokoroV1(BaseModelBackend): pipeline = self._get_pipeline(pipeline_lang_code) logger.debug( - f"Generating audio for text with lang_code '{pipeline_lang_code}': '{text[:100]}...'" + f"Generating audio for text with lang_code '{pipeline_lang_code}': '{text[:100]}{'...' if len(text) > 100 else ''}'" ) for result in pipeline( text, voice=voice_path, speed=speed, model=self._model diff --git a/api/src/services/audio.py b/api/src/services/audio.py index 2055c9f..64062b8 100644 --- a/api/src/services/audio.py +++ b/api/src/services/audio.py @@ -4,10 +4,12 @@ import struct from io import BytesIO import numpy as np +import math import scipy.io.wavfile as wavfile import soundfile as sf from loguru import logger from pydub import AudioSegment +from torch import norm from ..core.config import settings from .streaming_audio_writer import StreamingAudioWriter @@ -20,23 +22,66 @@ class AudioNormalizer: self.chunk_trim_ms = settings.gap_trim_ms self.sample_rate = 24000 # Sample rate of the audio self.samples_to_trim = int(self.chunk_trim_ms * self.sample_rate / 1000) + self.samples_to_pad_start= int(50 * self.sample_rate / 1000) + + def find_first_last_non_silent(self,audio_data: np.ndarray, chunk_text: str, speed: float, silence_threshold_db: int = -45, is_last_chunk: bool = False) -> tuple[int, int]: + """Finds the indices of the first and last non-silent samples in audio data. + + Args: + audio_data: Input audio data as numpy array + chunk_text: The text sent to the model to generate the resulting speech + speed: The speaking speed of the voice + silence_threshold_db: How quiet audio has to be to be conssidered silent + is_last_chunk: Whether this is the last chunk + + Returns: + A tuple with the start of the non silent portion and with the end of the non silent portion + """ + + pad_multiplier=1 + split_character=chunk_text.strip() + if len(split_character) > 0: + split_character=split_character[-1] + if split_character in settings.dynamic_gap_trim_padding_char_multiplier: + pad_multiplier=settings.dynamic_gap_trim_padding_char_multiplier[split_character] + + if not is_last_chunk: + samples_to_pad_end= max(int((settings.dynamic_gap_trim_padding_ms * self.sample_rate * pad_multiplier) / 1000) - self.samples_to_pad_start, 0) + else: + samples_to_pad_end=self.samples_to_pad_start + # Convert dBFS threshold to amplitude + amplitude_threshold = np.iinfo(audio_data.dtype).max * (10 ** (silence_threshold_db / 20)) + # Find the first samples above the silence threshold at the start and end of the audio + non_silent_index_start, non_silent_index_end = None,None + + for X in range(0,len(audio_data)): + #print(audio_data[X]) + if audio_data[X] > amplitude_threshold: + non_silent_index_start=X + break + + for X in range(len(audio_data) - 1, -1, -1): + if audio_data[X] > amplitude_threshold: + non_silent_index_end=X + break + + # Handle the case where the entire audio is silent + if non_silent_index_start == None or non_silent_index_end == None: + return 0, len(audio_data) + + return max(non_silent_index_start - self.samples_to_pad_start,0), min(non_silent_index_end + math.ceil(samples_to_pad_end / speed),len(audio_data)) async def normalize(self, audio_data: np.ndarray) -> np.ndarray: - """Convert audio data to int16 range and trim silence from start and end + """Convert audio data to int16 range Args: audio_data: Input audio data as numpy array - Returns: - Normalized and trimmed audio data + Normalized audio data """ if len(audio_data) == 0: raise ValueError("Empty audio data") - # Trim start and end if enough samples - if len(audio_data) > (2 * self.samples_to_trim): - audio_data = audio_data[self.samples_to_trim : -self.samples_to_trim] - # Scale directly to int16 range with clipping return np.clip(audio_data * 32767, -32768, 32767).astype(np.int16) @@ -71,6 +116,8 @@ class AudioService: audio_data: np.ndarray, sample_rate: int, output_format: str, + speed: float = 1, + chunk_text: str = "", is_first_chunk: bool = True, is_last_chunk: bool = False, normalizer: AudioNormalizer = None, @@ -81,6 +128,8 @@ class AudioService: audio_data: Numpy array of audio samples sample_rate: Sample rate of the audio output_format: Target format (wav, mp3, ogg, pcm) + speed: The speaking speed of the voice + chunk_text: The text sent to the model to generate the resulting speech is_first_chunk: Whether this is the first chunk is_last_chunk: Whether this is the last chunk normalizer: Optional AudioNormalizer instance for consistent normalization @@ -96,8 +145,10 @@ class AudioService: # Always normalize audio to ensure proper amplitude scaling if normalizer is None: normalizer = AudioNormalizer() + normalized_audio = await normalizer.normalize(audio_data) - + normalized_audio = AudioService.trim_audio(normalized_audio,chunk_text,speed,is_last_chunk,normalizer) + # Get or create format-specific writer writer_key = f"{output_format}_{sample_rate}" if is_first_chunk or writer_key not in AudioService._writers: @@ -123,3 +174,27 @@ class AudioService: raise ValueError( f"Failed to convert audio stream to {output_format}: {str(e)}" ) + @staticmethod + def trim_audio(audio_data: np.ndarray, chunk_text: str = "", speed: float = 1, is_last_chunk: bool = False, normalizer: AudioNormalizer = None) -> np.ndarray: + """Trim silence from start and end + + Args: + audio_data: Input audio data as numpy array + chunk_text: The text sent to the model to generate the resulting speech + speed: The speaking speed of the voice + is_last_chunk: Whether this is the last chunk + normalizer: Optional AudioNormalizer instance for consistent normalization + + Returns: + Trimmed audio data + """ + if normalizer is None: + normalizer = AudioNormalizer() + + # Trim start and end if enough samples + if len(audio_data) > (2 * normalizer.samples_to_trim): + audio_data = audio_data[normalizer.samples_to_trim : -normalizer.samples_to_trim] + + # Find non silent portion and trim + start_index,end_index=normalizer.find_first_last_non_silent(audio_data,chunk_text,speed,is_last_chunk=is_last_chunk) + return audio_data[start_index:end_index] \ No newline at end of file diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py index 383abbd..7c728fb 100644 --- a/api/src/services/text_processing/normalizer.py +++ b/api/src/services/text_processing/normalizer.py @@ -6,6 +6,7 @@ Converts them into a format suitable for text-to-speech processing. import re from functools import lru_cache +import inflect # Constants VALID_TLDS = [ @@ -50,6 +51,26 @@ VALID_TLDS = [ "io", ] +VALID_UNITS = { + "m":"meter", "cm":"centimeter", "mm":"millimeter", "km":"kilometer", "in":"inch", "ft":"foot", "yd":"yard", "mi":"mile", # Length + "g":"gram", "kg":"kilogram", "mg":"miligram", # Mass + "s":"second", "ms":"milisecond", "min":"minutes", "h":"hour", # Time + "l":"liter", "ml":"mililiter", "cl":"centiliter", "dl":"deciliter", # Volume + "kph":"kilometer per hour", "mph":"mile per hour","mi/h":"mile per hour", "m/s":"meter per second", "km/h":"kilometer per hour", "mm/s":"milimeter per second","cm/s":"centimeter per second", "ft/s":"feet per second", # Speed + "°c":"degree celsius","c":"degree celsius", "°f":"degree fahrenheit","f":"degree fahrenheit", "k":"kelvin", # Temperature + "pa":"pascal", "kpa":"kilopascal", "mpa":"megapascal", "atm":"atmosphere", # Pressure + "hz":"hertz", "khz":"kilohertz", "mhz":"megahertz", "ghz":"gigahertz", # Frequency + "v":"volt", "kv":"kilovolt", "mv":"mergavolt", # Voltage + "a":"amp", "ma":"megaamp", "ka":"kiloamp", # Current + "w":"watt", "kw":"kilowatt", "mw":"megawatt", # Power + "j":"joule", "kj":"kilojoule", "mj":"megajoule", # Energy + "Ω":"ohm", "kΩ":"kiloohm", "mΩ":"megaohm", # Resistance (Ohm) + "f":"farad", "µf":"microfarad", "nf":"nanofarad", "pf":"picofarad", # Capacitance + "b":"byte", "kb":"kilobyte", "mb":"megabyte", "gb":"gigabyte", "tb":"terabyte", "pb":"petabyte", # Data size + "kbps":"kilobyte per second","mbps":"megabyte per second","gbps":"gigabyte per second", + "px":"pixel" # CSS units +} + # Pre-compiled regex patterns for performance EMAIL_PATTERN = re.compile( r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-z]{2,}\b", re.IGNORECASE @@ -61,6 +82,9 @@ URL_PATTERN = re.compile( re.IGNORECASE, ) +UNIT_PATTERN = re.compile(r"((??@\[\\\]^_`{\|}~ \n]{1})""",re.IGNORECASE) + +INFLECT_ENGINE=inflect.engine() def split_num(num: re.Match[str]) -> str: """Handle number splitting for various formats""" @@ -86,6 +110,13 @@ def split_num(num: re.Match[str]) -> str: return f"{left} oh {right}{s}" return f"{left} {right}{s}" +def handle_units(u: re.Match[str]) -> str: + unit=u.group(6).strip() + if unit.lower() in VALID_UNITS: + unit=VALID_UNITS[unit.lower()].split(" ") + number=u.group(1).strip() + unit[0]=INFLECT_ENGINE.no(unit[0],number) + return " ".join(unit) def handle_money(m: re.Match[str]) -> str: """Convert money expressions to spoken form""" @@ -187,14 +218,17 @@ def normalize_text(text: str) -> str: # Pre-process URLs first text = normalize_urls(text) + # Pre-process numbers with units + text=UNIT_PATTERN.sub(handle_units,text) + # Replace quotes and brackets text = text.replace(chr(8216), "'").replace(chr(8217), "'") text = text.replace("«", chr(8220)).replace("»", chr(8221)) text = text.replace(chr(8220), '"').replace(chr(8221), '"') text = text.replace("(", "«").replace(")", "»") - # Handle CJK punctuation - for a, b in zip("、。!,:;?", ",.!,:;?"): + # Handle CJK punctuation and some non standard chars + for a, b in zip("、。!,:;?–", ",.!,:;?-"): text = text.replace(a, b + " ") # Clean up whitespace diff --git a/api/src/services/text_processing/text_processor.py b/api/src/services/text_processing/text_processor.py index 924b014..0e3cfcf 100644 --- a/api/src/services/text_processing/text_processor.py +++ b/api/src/services/text_processing/text_processor.py @@ -26,7 +26,7 @@ def process_text_chunk( List of token IDs """ start_time = time.time() - + if skip_phonemize: # Input is already phonemes, just tokenize t0 = time.time() @@ -35,12 +35,11 @@ def process_text_chunk( else: # Normal text processing pipeline t0 = time.time() - normalized = normalize_text(text) t1 = time.time() t0 = time.time() phonemes = phonemize( - normalized, language, normalize=False + text, language, normalize=False ) # Already normalized t1 = time.time() @@ -50,7 +49,7 @@ def process_text_chunk( total_time = time.time() - start_time logger.debug( - f"Total processing took {total_time * 1000:.2f}ms for chunk: '{text[:50]}...'" + f"Total processing took {total_time * 1000:.2f}ms for chunk: '{text[:50]}{'...' if len(text) > 50 else ''}'" ) return tokens @@ -61,7 +60,7 @@ async def yield_chunk( ) -> Tuple[str, List[int]]: """Yield a chunk with consistent logging.""" logger.debug( - f"Yielding chunk {chunk_count}: '{text[:50]}...' ({len(tokens)} tokens)" + f"Yielding chunk {chunk_count}: '{text[:50]}{'...' if len(text) > 50 else ''}' ({len(tokens)} tokens)" ) return text, tokens @@ -88,9 +87,10 @@ def process_text(text: str, language: str = "a") -> List[int]: def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]: """Process all sentences and return info.""" - sentences = re.split(r"([.!?;:])", text) + if settings.advanced_text_normalization: + text=normalize_text(text) + sentences = re.split(r"([.!?;:])(?=\s|$)", text) results = [] - for i in range(0, len(sentences), 2): sentence = sentences[i].strip() punct = sentences[i + 1] if i + 1 < len(sentences) else "" @@ -128,7 +128,7 @@ async def smart_split( chunk_text = " ".join(current_chunk) chunk_count += 1 logger.debug( - f"Yielding chunk {chunk_count}: '{chunk_text[:50]}...' ({current_count} tokens)" + f"Yielding chunk {chunk_count}: '{chunk_text[:50]}{'...' if len(text) > 50 else ''}' ({current_count} tokens)" ) yield chunk_text, current_tokens current_chunk = [] @@ -149,6 +149,7 @@ async def smart_split( continue full_clause = clause + comma + tokens = process_text_chunk(full_clause) count = len(tokens) @@ -166,7 +167,7 @@ async def smart_split( chunk_text = " ".join(clause_chunk) chunk_count += 1 logger.debug( - f"Yielding clause chunk {chunk_count}: '{chunk_text[:50]}...' ({clause_count} tokens)" + f"Yielding clause chunk {chunk_count}: '{chunk_text[:50]}{'...' if len(text) > 50 else ''}' ({clause_count} tokens)" ) yield chunk_text, clause_tokens clause_chunk = [full_clause] @@ -178,7 +179,7 @@ async def smart_split( chunk_text = " ".join(clause_chunk) chunk_count += 1 logger.debug( - f"Yielding final clause chunk {chunk_count}: '{chunk_text[:50]}...' ({clause_count} tokens)" + f"Yielding final clause chunk {chunk_count}: '{chunk_text[:50]}{'...' if len(text) > 50 else ''}' ({clause_count} tokens)" ) yield chunk_text, clause_tokens @@ -192,7 +193,7 @@ async def smart_split( chunk_text = " ".join(current_chunk) chunk_count += 1 logger.info( - f"Yielding chunk {chunk_count}: '{chunk_text[:50]}...' ({current_count} tokens)" + f"Yielding chunk {chunk_count}: '{chunk_text[:50]}{'...' if len(text) > 50 else ''}' ({current_count} tokens)" ) yield chunk_text, current_tokens current_chunk = [sentence] @@ -217,7 +218,7 @@ async def smart_split( chunk_text = " ".join(current_chunk) chunk_count += 1 logger.info( - f"Yielding chunk {chunk_count}: '{chunk_text[:50]}...' ({current_count} tokens)" + f"Yielding chunk {chunk_count}: '{chunk_text[:50]}{'...' if len(text) > 50 else ''}' ({current_count} tokens)" ) yield chunk_text, current_tokens current_chunk = [sentence] @@ -229,7 +230,7 @@ async def smart_split( chunk_text = " ".join(current_chunk) chunk_count += 1 logger.info( - f"Yielding final chunk {chunk_count}: '{chunk_text[:50]}...' ({current_count} tokens)" + f"Yielding final chunk {chunk_count}: '{chunk_text[:50]}{'...' if len(text) > 50 else ''}' ({current_count} tokens)" ) yield chunk_text, current_tokens diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py index 44fb709..3d533d9 100644 --- a/api/src/services/tts_service.py +++ b/api/src/services/tts_service.py @@ -67,6 +67,8 @@ class TTSService: np.array([0], dtype=np.float32), # Dummy data for type checking 24000, output_format, + speed, + "", is_first_chunk=False, normalizer=normalizer, is_last_chunk=True, @@ -97,15 +99,22 @@ class TTSService: chunk_audio, 24000, output_format, + speed, + chunk_text, is_first_chunk=is_first, - normalizer=normalizer, is_last_chunk=is_last, + normalizer=normalizer, ) yield converted except Exception as e: logger.error(f"Failed to convert audio: {str(e)}") else: - yield chunk_audio + trimmed = await AudioService.trim_audio(chunk_audio, + chunk_text, + speed, + is_last, + normalizer) + yield trimmed else: # For legacy backends, load voice tensor voice_tensor = await self._voice_manager.load_voice( @@ -130,6 +139,8 @@ class TTSService: chunk_audio, 24000, output_format, + speed, + chunk_text, is_first_chunk=is_first, normalizer=normalizer, is_last_chunk=is_last, @@ -138,7 +149,12 @@ class TTSService: except Exception as e: logger.error(f"Failed to convert audio: {str(e)}") else: - yield chunk_audio + trimmed = await AudioService.trim_audio(chunk_audio, + chunk_text, + speed, + is_last, + normalizer) + yield trimmed except Exception as e: logger.error(f"Failed to process tokens: {str(e)}") diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile index 5d6f99e..369d008 100644 --- a/docker/cpu/Dockerfile +++ b/docker/cpu/Dockerfile @@ -9,10 +9,10 @@ RUN apt-get update && apt-get install -y \ curl \ ffmpeg \ g++ \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && mkdir -p /usr/share/espeak-ng-data \ - && ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ +&& apt-get clean \ +&& rm -rf /var/lib/apt/lists/* \ +&& mkdir -p /usr/share/espeak-ng-data \ +&& ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ # Install UV using the installer script RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ @@ -20,7 +20,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ mv /root/.local/bin/uvx /usr/local/bin/ # Create non-root user and set up directories and permissions -RUN useradd -m -u 1000 appuser && \ +RUN useradd -m -u 1001 appuser && \ mkdir -p /app/api/src/models/v1_0 && \ chown -R appuser:appuser /app @@ -32,7 +32,7 @@ COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml # Install dependencies RUN --mount=type=cache,target=/root/.cache/uv \ - uv venv && \ + uv venv --python 3.11 && \ uv sync --extra cpu # Copy project files including models @@ -40,6 +40,7 @@ COPY --chown=appuser:appuser api ./api COPY --chown=appuser:appuser web ./web COPY --chown=appuser:appuser docker/scripts/ ./ RUN chmod +x ./entrypoint.sh +RUN sed -i 's/\r$//' ./entrypoint.sh # Set environment variables ENV PYTHONUNBUFFERED=1 \ diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index ce0f646..b19cf80 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -1,26 +1,29 @@ -FROM --platform=$BUILDPLATFORM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 +FROM --platform=$BUILDPLATFORM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu24.04 # Set non-interactive frontend ENV DEBIAN_FRONTEND=noninteractive # Install Python and other dependencies RUN apt-get update && apt-get install -y \ python3.10 \ - python3.10-venv \ + python3-venv \ espeak-ng \ espeak-ng-data \ git \ libsndfile1 \ curl \ ffmpeg \ - && apt-get clean && rm -rf /var/lib/apt/lists/* \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ && mkdir -p /usr/share/espeak-ng-data \ && ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ # Install UV using the installer script RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ mv /root/.local/bin/uv /usr/local/bin/ && \ - mv /root/.local/bin/uvx /usr/local/bin/ && \ - useradd -m -u 1000 appuser && \ + mv /root/.local/bin/uvx /usr/local/bin/ + +# Create non-root user and set up directories and permissions +RUN useradd -m -u 1001 appuser && \ mkdir -p /app/api/src/models/v1_0 && \ chown -R appuser:appuser /app @@ -32,7 +35,7 @@ COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml # Install dependencies with GPU extras (using cache mounts) RUN --mount=type=cache,target=/root/.cache/uv \ - uv venv && \ + uv venv --python 3.11 && \ uv sync --extra gpu # Copy project files including models and sync again @@ -40,6 +43,7 @@ COPY --chown=appuser:appuser api ./api COPY --chown=appuser:appuser web ./web COPY --chown=appuser:appuser docker/scripts/ ./ RUN chmod +x ./entrypoint.sh +RUN sed -i 's/\r$//' ./entrypoint.sh RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --extra gpu diff --git a/pyproject.toml b/pyproject.toml index 336519e..4205fea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,8 @@ dependencies = [ "kokoro @ git+https://github.com/hexgrad/kokoro.git@31a2b6337b8c1b1418ef68c48142328f640da938", 'misaki[en,ja,ko,zh] @ git+https://github.com/hexgrad/misaki.git@ebc76c21b66c5fc4866ed0ec234047177b396170', "spacy==3.7.2", - "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl" + "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl", + "inflect>=7.5.0", ] [project.optional-dependencies] diff --git a/start-gpu.bat b/start-gpu.bat new file mode 100644 index 0000000..d7067e4 --- /dev/null +++ b/start-gpu.bat @@ -0,0 +1,10 @@ +set PYTHONUTF8=1 +set USE_GPU=true +set USE_ONNX=false +set PYTHONPATH=%PROJECT_ROOT%;%PROJECT_ROOT%\api +set MODEL_DIR=src\models +set VOICES_DIR=src\voices\v1_0 +set WEB_PLAYER_PATH=%PROJECT_ROOT%\web + +call uv pip install -e ".[gpu]" +call uv run uvicorn api.src.main:app --reload --host 0.0.0.0 --port 8880 \ No newline at end of file From 737e49a3f989c0c111a8de14107b52b1a4f48e68 Mon Sep 17 00:00:00 2001 From: Fireblade Date: Mon, 10 Feb 2025 21:49:05 -0500 Subject: [PATCH 02/10] removed testing start-gpu.bat --- start-gpu.bat | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 start-gpu.bat diff --git a/start-gpu.bat b/start-gpu.bat deleted file mode 100644 index d7067e4..0000000 --- a/start-gpu.bat +++ /dev/null @@ -1,10 +0,0 @@ -set PYTHONUTF8=1 -set USE_GPU=true -set USE_ONNX=false -set PYTHONPATH=%PROJECT_ROOT%;%PROJECT_ROOT%\api -set MODEL_DIR=src\models -set VOICES_DIR=src\voices\v1_0 -set WEB_PLAYER_PATH=%PROJECT_ROOT%\web - -call uv pip install -e ".[gpu]" -call uv run uvicorn api.src.main:app --reload --host 0.0.0.0 --port 8880 \ No newline at end of file From 68cb097d9b47744e4523faee5adee48aee1a7e9a Mon Sep 17 00:00:00 2001 From: Fireblade2534 Date: Tue, 11 Feb 2025 14:05:14 +0000 Subject: [PATCH 03/10] Merged from orgin/master --- api/src/core/config.py | 1 + api/src/inference/kokoro_v1.py | 14 ++++++++++---- docker/gpu/Dockerfile | 33 ++++++++++++++++----------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/api/src/core/config.py b/api/src/core/config.py index d361a5c..f5fd569 100644 --- a/api/src/core/config.py +++ b/api/src/core/config.py @@ -13,6 +13,7 @@ class Settings(BaseSettings): output_dir: str = "output" output_dir_size_limit_mb: float = 500.0 # Maximum size of output directory in MB default_voice: str = "af_heart" + default_voice_code: str | None = None # If set, overrides the first letter of voice name, though api call param still takes precedence use_gpu: bool = True # Whether to use GPU acceleration if available allow_local_voice_saving: bool = ( False # Whether to allow saving combined voices locally diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py index 248593e..99e76fa 100644 --- a/api/src/inference/kokoro_v1.py +++ b/api/src/inference/kokoro_v1.py @@ -139,8 +139,14 @@ class KokoroV1(BaseModelBackend): await paths.save_voice_tensor(voice_tensor, temp_path) voice_path = temp_path - # Use provided lang_code or get from voice name - pipeline_lang_code = lang_code if lang_code else voice_name[0].lower() + # Use provided lang_code, settings voice code override, or first letter of voice name + if lang_code: # api is given priority + pipeline_lang_code = lang_code + elif settings.default_voice_code: # settings is next priority + pipeline_lang_code = settings.default_voice_code + else: # voice name is default/fallback + pipeline_lang_code = voice_name[0].lower() + pipeline = self._get_pipeline(pipeline_lang_code) logger.debug( @@ -231,8 +237,8 @@ class KokoroV1(BaseModelBackend): await paths.save_voice_tensor(voice_tensor, temp_path) voice_path = temp_path - # Use provided lang_code or get from voice name - pipeline_lang_code = lang_code if lang_code else voice_name[0].lower() + # Use provided lang_code, settings voice code override, or first letter of voice name + pipeline_lang_code = lang_code if lang_code else (settings.default_voice_code if settings.default_voice_code else voice_name[0].lower()) pipeline = self._get_pipeline(pipeline_lang_code) logger.debug( diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index b19cf80..7e4606e 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -12,51 +12,50 @@ RUN apt-get update && apt-get install -y \ libsndfile1 \ curl \ ffmpeg \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ + g++ \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ && mkdir -p /usr/share/espeak-ng-data \ && ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ # Install UV using the installer script RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ mv /root/.local/bin/uv /usr/local/bin/ && \ - mv /root/.local/bin/uvx /usr/local/bin/ - -# Create non-root user and set up directories and permissions -RUN useradd -m -u 1001 appuser && \ - mkdir -p /app/api/src/models/v1_0 && \ - chown -R appuser:appuser /app + mv /root/.local/bin/uvx /usr/local/bin/ +# Create non-root user and set up directories and permissions +RUN useradd -m -u 1000 appuser && \ + mkdir -p /app/api/src/models/v1_0 && \ + chown -R appuser:appuser /app + USER appuser WORKDIR /app # Copy dependency files COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml +ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \ + PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \ + ESPEAK_DATA_PATH=/usr/share/espeak-ng-data + # Install dependencies with GPU extras (using cache mounts) RUN --mount=type=cache,target=/root/.cache/uv \ uv venv --python 3.11 && \ uv sync --extra gpu -# Copy project files including models and sync again +# Copy project files including models COPY --chown=appuser:appuser api ./api COPY --chown=appuser:appuser web ./web COPY --chown=appuser:appuser docker/scripts/ ./ RUN chmod +x ./entrypoint.sh -RUN sed -i 's/\r$//' ./entrypoint.sh -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --extra gpu + # Set all environment variables in one go ENV PYTHONUNBUFFERED=1 \ PYTHONPATH=/app:/app/api \ PATH="/app/.venv/bin:$PATH" \ UV_LINK_MODE=copy \ - USE_GPU=true \ - PHONEMIZER_ESPEAK_PATH=/usr/bin \ - PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \ - ESPEAK_DATA_PATH=/usr/share/espeak-ng-data - + USE_GPU=true + ENV DOWNLOAD_MODEL=true # Download model if enabled RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \ From 64980b5bc8d75765192d7ec7f6adc049c4e457f3 Mon Sep 17 00:00:00 2001 From: Fireblade2534 Date: Tue, 11 Feb 2025 15:18:10 +0000 Subject: [PATCH 04/10] made it so bytes vs bits are translated correctly --- .../services/text_processing/normalizer.py | 25 +++++++++++++------ docker/cpu/Dockerfile | 5 ++-- docker/gpu/Dockerfile | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py index 7c728fb..ca26ffb 100644 --- a/api/src/services/text_processing/normalizer.py +++ b/api/src/services/text_processing/normalizer.py @@ -56,7 +56,7 @@ VALID_UNITS = { "g":"gram", "kg":"kilogram", "mg":"miligram", # Mass "s":"second", "ms":"milisecond", "min":"minutes", "h":"hour", # Time "l":"liter", "ml":"mililiter", "cl":"centiliter", "dl":"deciliter", # Volume - "kph":"kilometer per hour", "mph":"mile per hour","mi/h":"mile per hour", "m/s":"meter per second", "km/h":"kilometer per hour", "mm/s":"milimeter per second","cm/s":"centimeter per second", "ft/s":"feet per second", # Speed + "kph":"kilometer per hour", "mph":"mile per hour","mi/h":"mile per hour", "m/s":"meter per second", "km/h":"kilometer per hour", "mm/s":"milimeter per second","cm/s":"centimeter per second", "ft/s":"feet per second","cm/h":"centimeter per day", # Speed "°c":"degree celsius","c":"degree celsius", "°f":"degree fahrenheit","f":"degree fahrenheit", "k":"kelvin", # Temperature "pa":"pascal", "kpa":"kilopascal", "mpa":"megapascal", "atm":"atmosphere", # Pressure "hz":"hertz", "khz":"kilohertz", "mhz":"megahertz", "ghz":"gigahertz", # Frequency @@ -66,11 +66,12 @@ VALID_UNITS = { "j":"joule", "kj":"kilojoule", "mj":"megajoule", # Energy "Ω":"ohm", "kΩ":"kiloohm", "mΩ":"megaohm", # Resistance (Ohm) "f":"farad", "µf":"microfarad", "nf":"nanofarad", "pf":"picofarad", # Capacitance - "b":"byte", "kb":"kilobyte", "mb":"megabyte", "gb":"gigabyte", "tb":"terabyte", "pb":"petabyte", # Data size - "kbps":"kilobyte per second","mbps":"megabyte per second","gbps":"gigabyte per second", + "b":"bit", "kb":"kilobit", "mb":"megabit", "gb":"gigabit", "tb":"terabit", "pb":"petabit", # Data size + "kbps":"kilobit per second","mbps":"megabit per second","gbps":"gigabit per second","tbps":"terabit per second", "px":"pixel" # CSS units } + # Pre-compiled regex patterns for performance EMAIL_PATTERN = re.compile( r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-z]{2,}\b", re.IGNORECASE @@ -82,7 +83,7 @@ URL_PATTERN = re.compile( re.IGNORECASE, ) -UNIT_PATTERN = re.compile(r"((??@\[\\\]^_`{\|}~ \n]{1})""",re.IGNORECASE) +UNIT_PATTERN = re.compile(r"((? str: return f"{left} {right}{s}" def handle_units(u: re.Match[str]) -> str: - unit=u.group(6).strip() - if unit.lower() in VALID_UNITS: - unit=VALID_UNITS[unit.lower()].split(" ") + unit_string=u.group(6).strip() + unit=unit_string + + print(unit) + if unit_string.lower() in VALID_UNITS: + unit=VALID_UNITS[unit_string.lower()].split(" ") + + # Handles the B vs b case + if unit[0].endswith("bit"): + b_case=unit_string[min(1,len(unit_string) - 1)] + if b_case == "B": + unit[0]=unit[0][:-3] + "byte" + number=u.group(1).strip() unit[0]=INFLECT_ENGINE.no(unit[0],number) return " ".join(unit) diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile index 369d008..d770a6c 100644 --- a/docker/cpu/Dockerfile +++ b/docker/cpu/Dockerfile @@ -20,7 +20,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ mv /root/.local/bin/uvx /usr/local/bin/ # Create non-root user and set up directories and permissions -RUN useradd -m -u 1001 appuser && \ +RUN useradd -m -u 1000 appuser && \ mkdir -p /app/api/src/models/v1_0 && \ chown -R appuser:appuser /app @@ -32,7 +32,7 @@ COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml # Install dependencies RUN --mount=type=cache,target=/root/.cache/uv \ - uv venv --python 3.11 && \ + uv venv --python 3.10 && \ uv sync --extra cpu # Copy project files including models @@ -40,7 +40,6 @@ COPY --chown=appuser:appuser api ./api COPY --chown=appuser:appuser web ./web COPY --chown=appuser:appuser docker/scripts/ ./ RUN chmod +x ./entrypoint.sh -RUN sed -i 's/\r$//' ./entrypoint.sh # Set environment variables ENV PYTHONUNBUFFERED=1 \ diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index 7e4606e..46fe956 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -39,7 +39,7 @@ ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \ # Install dependencies with GPU extras (using cache mounts) RUN --mount=type=cache,target=/root/.cache/uv \ - uv venv --python 3.11 && \ + uv venv --python 3.10 && \ uv sync --extra gpu # Copy project files including models From d4f248b3a2a27113dece8176b16a66faf40f67cf Mon Sep 17 00:00:00 2001 From: Vincent Bailleau Date: Tue, 11 Feb 2025 19:10:01 +0100 Subject: [PATCH 05/10] Add Helm chart --- charts/kokoro-fastapi/.helmignore | 23 +++++ charts/kokoro-fastapi/Chart.yaml | 24 +++++ charts/kokoro-fastapi/templates/NOTES.txt | 22 +++++ charts/kokoro-fastapi/templates/_helpers.tpl | 62 ++++++++++++ charts/kokoro-fastapi/templates/hpa.yaml | 28 ++++++ charts/kokoro-fastapi/templates/ingress.yaml | 82 ++++++++++++++++ .../templates/kokoro-tts-deployment.yaml | 71 ++++++++++++++ .../templates/kokoro-tts-service.yaml | 15 +++ .../templates/serviceaccount.yaml | 12 +++ .../templates/tests/test-connection.yaml | 15 +++ charts/kokoro-fastapi/values.yaml | 94 +++++++++++++++++++ 11 files changed, 448 insertions(+) create mode 100644 charts/kokoro-fastapi/.helmignore create mode 100644 charts/kokoro-fastapi/Chart.yaml create mode 100644 charts/kokoro-fastapi/templates/NOTES.txt create mode 100644 charts/kokoro-fastapi/templates/_helpers.tpl create mode 100644 charts/kokoro-fastapi/templates/hpa.yaml create mode 100644 charts/kokoro-fastapi/templates/ingress.yaml create mode 100644 charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml create mode 100644 charts/kokoro-fastapi/templates/kokoro-tts-service.yaml create mode 100644 charts/kokoro-fastapi/templates/serviceaccount.yaml create mode 100644 charts/kokoro-fastapi/templates/tests/test-connection.yaml create mode 100644 charts/kokoro-fastapi/values.yaml diff --git a/charts/kokoro-fastapi/.helmignore b/charts/kokoro-fastapi/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/charts/kokoro-fastapi/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/kokoro-fastapi/Chart.yaml b/charts/kokoro-fastapi/Chart.yaml new file mode 100644 index 0000000..bd0cf5d --- /dev/null +++ b/charts/kokoro-fastapi/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: kokoro-fastapi +description: A Helm chart for kokoro-fastapi + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/charts/kokoro-fastapi/templates/NOTES.txt b/charts/kokoro-fastapi/templates/NOTES.txt new file mode 100644 index 0000000..88b8980 --- /dev/null +++ b/charts/kokoro-fastapi/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "kokoro-fastapi.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/charts/kokoro-fastapi/templates/_helpers.tpl b/charts/kokoro-fastapi/templates/_helpers.tpl new file mode 100644 index 0000000..849bcd7 --- /dev/null +++ b/charts/kokoro-fastapi/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "kokoro-fastapi.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "kokoro-fastapi.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "kokoro-fastapi.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "kokoro-fastapi.labels" -}} +helm.sh/chart: {{ include "kokoro-fastapi.chart" . }} +{{ include "kokoro-fastapi.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "kokoro-fastapi.selectorLabels" -}} +app.kubernetes.io/name: {{ include "kokoro-fastapi.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "kokoro-fastapi.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "kokoro-fastapi.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/charts/kokoro-fastapi/templates/hpa.yaml b/charts/kokoro-fastapi/templates/hpa.yaml new file mode 100644 index 0000000..45ba60b --- /dev/null +++ b/charts/kokoro-fastapi/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "kokoro-fastapi.fullname" . }} + labels: + {{- include "kokoro-fastapi.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "kokoro-fastapi.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/charts/kokoro-fastapi/templates/ingress.yaml b/charts/kokoro-fastapi/templates/ingress.yaml new file mode 100644 index 0000000..09a8fb5 --- /dev/null +++ b/charts/kokoro-fastapi/templates/ingress.yaml @@ -0,0 +1,82 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "kokoro-fastapi.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- $rewriteTargets := (list) -}} +{{- with .Values.ingress.host }} + {{- range .endpoints }} + {{- $serviceName := default $fullName .serviceName -}} + {{- $rewrite := .rewrite | default "none" -}} + {{- if not (has $rewrite $rewriteTargets ) -}} + {{- $rewriteTargets = append $rewriteTargets $rewrite -}} + {{- end -}} + {{- end}} +{{- end }} +{{- range $key := $rewriteTargets }} +{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: +{{- if eq $key "none" }} + name: {{ $fullName }} +{{- else }} + name: {{ $fullName }}-{{ $expandedRewrite }} +{{- end }} + labels: + {{- include "kokoro-fastapi.labels" $ | nindent 4 }} + {{- if ne $key "none" }} + annotations: + nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2 + {{- end }} +spec: +{{- if $.Values.ingress.tls }} + tls: + {{- range $.Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} +{{- end }} + rules: + {{- with $.Values.ingress.host }} + - host: {{ .name | quote }} + http: + paths: + {{- range .endpoints }} + {{- $serviceName := default $fullName .serviceName -}} + {{- $servicePort := default (print "http") .servicePort -}} + {{- if eq ( .rewrite | default "none" ) $key }} + {{- range .paths }} + {{- if not (contains "@" .) }} + {{- if eq $key "none" }} + - path: {{ . }} + {{- else }} + - path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*) + {{- end }} + pathType: Prefix + backend: + service: + name: "{{ $fullName }}-{{ $serviceName }}" + port: + number: {{ $servicePort }} + {{- else }} + {{- $path := . -}} + {{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}} + {{- range $count, $e := until ($replicaCount|int) }} + - path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*) + pathType: Prefix + backend: + service: + name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}" + port: + number: {{ $servicePort }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +--- +{{- end }} +{{- end }} diff --git a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml new file mode 100644 index 0000000..be1f67b --- /dev/null +++ b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml @@ -0,0 +1,71 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "kokoro-fastapi.fullname" . }}-kokoro-tts + labels: + {{- include "kokoro-fastapi.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.kokoroTTS.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "kokoro-fastapi.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "kokoro-fastapi.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + initContainers: [] + containers: + - name: kokoro-tts + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.kokoroTTS.repository }}:{{ .Values.kokoroTTS.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.kokoroTTS.pullPolicy }} + env: + - name: PYTHONPATH + value: "/app:/app/api" + - name: USE_GPU + value: "true" + - name: PYTHONUNBUFFERED + value: "1" + ports: + - name: kokoro-tts-http + containerPort: {{ .Values.kokoroTTS.port | default 8880 }} + protocol: TCP + livenessProbe: + httpGet: + path: /health + port: kokoro-tts-http + readinessProbe: + httpGet: + path: /health + port: kokoro-tts-http + resources: + {{- toYaml .Values.kokoroTTS.resources | nindent 12 }} + volumeMounts: [] + volumes: [] + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/charts/kokoro-fastapi/templates/kokoro-tts-service.yaml b/charts/kokoro-fastapi/templates/kokoro-tts-service.yaml new file mode 100644 index 0000000..79c7890 --- /dev/null +++ b/charts/kokoro-fastapi/templates/kokoro-tts-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "kokoro-fastapi.fullname" . }}-kokoro-tts-service + labels: + {{- include "kokoro-fastapi.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.kokoroTTS.port }} + targetPort: kokoro-tts-http + protocol: TCP + name: kokoro-tts-http + selector: + {{- include "kokoro-fastapi.selectorLabels" . | nindent 4 }} diff --git a/charts/kokoro-fastapi/templates/serviceaccount.yaml b/charts/kokoro-fastapi/templates/serviceaccount.yaml new file mode 100644 index 0000000..f062f72 --- /dev/null +++ b/charts/kokoro-fastapi/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "kokoro-fastapi.serviceAccountName" . }} + labels: + {{- include "kokoro-fastapi.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/kokoro-fastapi/templates/tests/test-connection.yaml b/charts/kokoro-fastapi/templates/tests/test-connection.yaml new file mode 100644 index 0000000..120583f --- /dev/null +++ b/charts/kokoro-fastapi/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "kokoro-fastapi.fullname" . }}-test-connection" + labels: + {{- include "kokoro-fastapi.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/charts/kokoro-fastapi/values.yaml b/charts/kokoro-fastapi/values.yaml new file mode 100644 index 0000000..05419d9 --- /dev/null +++ b/charts/kokoro-fastapi/values.yaml @@ -0,0 +1,94 @@ +# Default values for kokoro-fastapi. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +images: + pullPolicy: "Always" + imagePullSecrets: [ ] + +nameOverride: "" +fullnameOverride: "" + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ClusterIP + +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + host: + name: kokoro.example.com + endpoints: + backend: + path: "/" + serviceName: "fastapi" + servicePort: 8880 + + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +kokoroTTS: + repository: "ghcr.io/remsky/kokoro-fastapi-gpu" + tag: "latest" + pullPolicy: Always + serviceName: "fastapi" + port: 8880 + replicaCount: 1 + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 + + + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +nodeSelector: {} + +tolerations: [] + +affinity: {} From 1e14fd8724f71d63c1c1b8e9fb7ac5b1c97527b2 Mon Sep 17 00:00:00 2001 From: zucher Date: Tue, 11 Feb 2025 21:02:58 +0000 Subject: [PATCH 06/10] Fix chart ingress issue --- charts/kokoro-fastapi/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/kokoro-fastapi/values.yaml b/charts/kokoro-fastapi/values.yaml index 05419d9..0db2f95 100644 --- a/charts/kokoro-fastapi/values.yaml +++ b/charts/kokoro-fastapi/values.yaml @@ -45,8 +45,8 @@ ingress: host: name: kokoro.example.com endpoints: - backend: - path: "/" + - paths: + - "/" serviceName: "fastapi" servicePort: 8880 From 8ea8e68b61b20ce56bd592ed4fde02e637464760 Mon Sep 17 00:00:00 2001 From: Fireblade Date: Tue, 11 Feb 2025 18:08:36 -0500 Subject: [PATCH 07/10] Fixed espeak backend erroring while initilizating causing espeak fallback to silently fail --- docker/gpu/Dockerfile | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index 46fe956..44c1ba7 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -23,7 +23,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ mv /root/.local/bin/uvx /usr/local/bin/ # Create non-root user and set up directories and permissions -RUN useradd -m -u 1000 appuser && \ +RUN useradd -m -u 1001 appuser && \ mkdir -p /app/api/src/models/v1_0 && \ chown -R appuser:appuser /app diff --git a/pyproject.toml b/pyproject.toml index 4205fea..acb5004 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,8 +18,6 @@ dependencies = [ "scipy==1.14.1", # Audio processing "soundfile==0.13.0", - # Text processing - "phonemizer==3.3.0", "regex==2024.11.6", # Utilities "aiofiles==23.2.1", @@ -38,6 +36,7 @@ dependencies = [ "spacy==3.7.2", "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl", "inflect>=7.5.0", + "phonemizer-fork>=3.3.2", ] [project.optional-dependencies] From 09de389b29ef0355e199e8ec6b0ab3e14eb6ca80 Mon Sep 17 00:00:00 2001 From: Fireblade Date: Tue, 11 Feb 2025 19:09:35 -0500 Subject: [PATCH 08/10] Added normilization options --- api/src/routers/openai_compatible.py | 2 ++ .../services/text_processing/normalizer.py | 33 +++++++++---------- .../text_processing/text_processor.py | 12 ++++--- api/src/services/tts_service.py | 5 +-- api/src/structures/schemas.py | 12 ++++++- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py index 5508d65..3be678a 100644 --- a/api/src/routers/openai_compatible.py +++ b/api/src/routers/openai_compatible.py @@ -3,6 +3,7 @@ import io import json import os +import re import tempfile from typing import AsyncGenerator, Dict, List, Union @@ -138,6 +139,7 @@ async def stream_audio_chunks( speed=request.speed, output_format=request.response_format, lang_code=request.lang_code or request.voice[0], + normalization_options=request.normalization_options ): # Check if client is still connected is_disconnected = client_request.is_disconnected diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py index ca26ffb..3bc9021 100644 --- a/api/src/services/text_processing/normalizer.py +++ b/api/src/services/text_processing/normalizer.py @@ -8,6 +8,8 @@ import re from functools import lru_cache import inflect +from ...structures.schemas import NormalizationOptions + # Constants VALID_TLDS = [ "com", @@ -112,10 +114,10 @@ def split_num(num: re.Match[str]) -> str: return f"{left} {right}{s}" def handle_units(u: re.Match[str]) -> str: + """Converts units to their full form""" unit_string=u.group(6).strip() unit=unit_string - print(unit) if unit_string.lower() in VALID_UNITS: unit=VALID_UNITS[unit_string.lower()].split(" ") @@ -213,24 +215,19 @@ def handle_url(u: re.Match[str]) -> str: return re.sub(r"\s+", " ", url).strip() -def normalize_urls(text: str) -> str: - """Pre-process URLs before other text normalization""" - # Handle email addresses first - text = EMAIL_PATTERN.sub(handle_email, text) - - # Handle URLs - text = URL_PATTERN.sub(handle_url, text) - - return text - - -def normalize_text(text: str) -> str: +def normalize_text(text: str,normalization_options: NormalizationOptions) -> str: """Normalize text for TTS processing""" - # Pre-process URLs first - text = normalize_urls(text) + # Handle email addresses first if enabled + if normalization_options.email_normalization: + text = EMAIL_PATTERN.sub(handle_email, text) - # Pre-process numbers with units - text=UNIT_PATTERN.sub(handle_units,text) + # Handle URLs if enabled + if normalization_options.url_normalization: + text = URL_PATTERN.sub(handle_url, text) + + # Pre-process numbers with units if enabled + if normalization_options.unit_normalization: + text=UNIT_PATTERN.sub(handle_units,text) # Replace quotes and brackets text = text.replace(chr(8216), "'").replace(chr(8217), "'") @@ -261,12 +258,14 @@ def normalize_text(text: str) -> str: text = re.sub( r"\d*\.\d+|\b\d{4}s?\b|(? List[int]: def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]: """Process all sentences and return info.""" - if settings.advanced_text_normalization: - text=normalize_text(text) sentences = re.split(r"([.!?;:])(?=\s|$)", text) results = [] for i in range(0, len(sentences), 2): @@ -106,13 +104,19 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]: async def smart_split( - text: str, max_tokens: int = settings.absolute_max_tokens + text: str, + max_tokens: int = settings.absolute_max_tokens, + normalization_options: NormalizationOptions = NormalizationOptions() ) -> AsyncGenerator[Tuple[str, List[int]], None]: """Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens.""" start_time = time.time() chunk_count = 0 logger.info(f"Starting smart split for {len(text)} chars") + # Normilize text + if settings.advanced_text_normalization and normalization_options.normalize: + text=normalize_text(text,normalization_options) + # Process all sentences sentences = get_sentence_info(text) diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py index 3d533d9..ba4dcc4 100644 --- a/api/src/services/tts_service.py +++ b/api/src/services/tts_service.py @@ -18,7 +18,7 @@ from ..inference.voice_manager import get_manager as get_voice_manager from .audio import AudioNormalizer, AudioService from .text_processing import tokenize from .text_processing.text_processor import process_text_chunk, smart_split - +from ..structures.schemas import NormalizationOptions class TTSService: """Text-to-speech service.""" @@ -238,6 +238,7 @@ class TTSService: speed: float = 1.0, output_format: str = "wav", lang_code: Optional[str] = None, + normalization_options: Optional[NormalizationOptions] = NormalizationOptions() ) -> AsyncGenerator[bytes, None]: """Generate and stream audio chunks.""" stream_normalizer = AudioNormalizer() @@ -258,7 +259,7 @@ class TTSService: ) # Process text in chunks with smart splitting - async for chunk_text, tokens in smart_split(text): + async for chunk_text, tokens in smart_split(text,normalization_options=normalization_options): try: # Process audio for chunk async for result in self._process_chunk( diff --git a/api/src/structures/schemas.py b/api/src/structures/schemas.py index 4e76a69..491ae60 100644 --- a/api/src/structures/schemas.py +++ b/api/src/structures/schemas.py @@ -36,7 +36,13 @@ class CaptionedSpeechResponse(BaseModel): audio: bytes = Field(..., description="The generated audio data") words: List[WordTimestamp] = Field(..., description="Word-level timestamps") - +class NormalizationOptions(BaseModel): + """Options for the normalization system""" + normalize: bool = Field(default=True, description="Normalizes input text to make it easier for the model to say") + unit_normalization: bool = Field(default=False,description="Transforms units like 10KB to 10 kilobytes") + url_normalization: bool = Field(default=True, description="Changes urls so they can be properly pronouced by kokoro") + email_normalization: bool = Field(default=True, description="Changes emails so they can be properly pronouced by kokoro") + class OpenAISpeechRequest(BaseModel): """Request schema for OpenAI-compatible speech endpoint""" @@ -71,6 +77,10 @@ class OpenAISpeechRequest(BaseModel): default=None, description="Optional language code to use for text processing. If not provided, will use first letter of voice name.", ) + normalization_options: Optional[NormalizationOptions] = Field( + default= NormalizationOptions(), + description= "Options for the normalization system" + ) class CaptionedSpeechRequest(BaseModel): From 7cb5957848b79530f328ba8fd0720e2b40a2c267 Mon Sep 17 00:00:00 2001 From: Fireblade Date: Tue, 11 Feb 2025 19:24:29 -0500 Subject: [PATCH 09/10] added optional pluralization normalization --- api/src/services/text_processing/normalizer.py | 6 +++++- api/src/structures/schemas.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py index 3bc9021..ac04eca 100644 --- a/api/src/services/text_processing/normalizer.py +++ b/api/src/services/text_processing/normalizer.py @@ -229,6 +229,10 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str if normalization_options.unit_normalization: text=UNIT_PATTERN.sub(handle_units,text) + # Replace optional pluralization + if normalization_options.optional_pluralization_normalization: + text = re.sub(r"\(s\)","s",text) + # Replace quotes and brackets text = text.replace(chr(8216), "'").replace(chr(8217), "'") text = text.replace("«", chr(8220)).replace("»", chr(8221)) @@ -276,6 +280,6 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str text = re.sub( r"(?:[A-Za-z]\.){2,} [a-z]", lambda m: m.group().replace(".", "-"), text ) - text = re.sub(r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text) + text = re.sub( r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text) return text.strip() diff --git a/api/src/structures/schemas.py b/api/src/structures/schemas.py index 491ae60..7a6484c 100644 --- a/api/src/structures/schemas.py +++ b/api/src/structures/schemas.py @@ -42,6 +42,7 @@ class NormalizationOptions(BaseModel): unit_normalization: bool = Field(default=False,description="Transforms units like 10KB to 10 kilobytes") url_normalization: bool = Field(default=True, description="Changes urls so they can be properly pronouced by kokoro") email_normalization: bool = Field(default=True, description="Changes emails so they can be properly pronouced by kokoro") + optional_pluralization_normalization: bool = Field(default=True, description="Replaces (s) with s so some words get pronounced correctly") class OpenAISpeechRequest(BaseModel): """Request schema for OpenAI-compatible speech endpoint""" From da1e280805c4816f91136c8ef4941283e9364d07 Mon Sep 17 00:00:00 2001 From: Fireblade Date: Tue, 11 Feb 2025 21:30:41 -0500 Subject: [PATCH 10/10] fix tests --- api/tests/test_normalizer.py | 44 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/api/tests/test_normalizer.py b/api/tests/test_normalizer.py index 9146252..52c525c 100644 --- a/api/tests/test_normalizer.py +++ b/api/tests/test_normalizer.py @@ -3,29 +3,29 @@ import pytest from api.src.services.text_processing.normalizer import normalize_text - +from api.src.structures.schemas import NormalizationOptions def test_url_protocols(): """Test URL protocol handling""" assert ( - normalize_text("Check out https://example.com") + normalize_text("Check out https://example.com",normalization_options=NormalizationOptions()) == "Check out https example dot com" ) - assert normalize_text("Visit http://site.com") == "Visit http site dot com" + assert normalize_text("Visit http://site.com",normalization_options=NormalizationOptions()) == "Visit http site dot com" assert ( - normalize_text("Go to https://test.org/path") + normalize_text("Go to https://test.org/path",normalization_options=NormalizationOptions()) == "Go to https test dot org slash path" ) def test_url_www(): """Test www prefix handling""" - assert normalize_text("Go to www.example.com") == "Go to www example dot com" + assert normalize_text("Go to www.example.com",normalization_options=NormalizationOptions()) == "Go to www example dot com" assert ( - normalize_text("Visit www.test.org/docs") == "Visit www test dot org slash docs" + normalize_text("Visit www.test.org/docs",normalization_options=NormalizationOptions()) == "Visit www test dot org slash docs" ) assert ( - normalize_text("Check www.site.com?q=test") + normalize_text("Check www.site.com?q=test",normalization_options=NormalizationOptions()) == "Check www site dot com question-mark q equals test" ) @@ -33,15 +33,15 @@ def test_url_www(): def test_url_localhost(): """Test localhost URL handling""" assert ( - normalize_text("Running on localhost:7860") + normalize_text("Running on localhost:7860",normalization_options=NormalizationOptions()) == "Running on localhost colon 78 60" ) assert ( - normalize_text("Server at localhost:8080/api") + normalize_text("Server at localhost:8080/api",normalization_options=NormalizationOptions()) == "Server at localhost colon 80 80 slash api" ) assert ( - normalize_text("Test localhost:3000/test?v=1") + normalize_text("Test localhost:3000/test?v=1",normalization_options=NormalizationOptions()) == "Test localhost colon 3000 slash test question-mark v equals 1" ) @@ -49,43 +49,43 @@ def test_url_localhost(): def test_url_ip_addresses(): """Test IP address URL handling""" assert ( - normalize_text("Access 0.0.0.0:9090/test") + normalize_text("Access 0.0.0.0:9090/test",normalization_options=NormalizationOptions()) == "Access 0 dot 0 dot 0 dot 0 colon 90 90 slash test" ) assert ( - normalize_text("API at 192.168.1.1:8000") + normalize_text("API at 192.168.1.1:8000",normalization_options=NormalizationOptions()) == "API at 192 dot 168 dot 1 dot 1 colon 8000" ) - assert normalize_text("Server 127.0.0.1") == "Server 127 dot 0 dot 0 dot 1" + assert normalize_text("Server 127.0.0.1",normalization_options=NormalizationOptions()) == "Server 127 dot 0 dot 0 dot 1" def test_url_raw_domains(): """Test raw domain handling""" assert ( - normalize_text("Visit google.com/search") == "Visit google dot com slash search" + normalize_text("Visit google.com/search",normalization_options=NormalizationOptions()) == "Visit google dot com slash search" ) assert ( - normalize_text("Go to example.com/path?q=test") + normalize_text("Go to example.com/path?q=test",normalization_options=NormalizationOptions()) == "Go to example dot com slash path question-mark q equals test" ) - assert normalize_text("Check docs.test.com") == "Check docs dot test dot com" + assert normalize_text("Check docs.test.com",normalization_options=NormalizationOptions()) == "Check docs dot test dot com" def test_url_email_addresses(): """Test email address handling""" assert ( - normalize_text("Email me at user@example.com") + normalize_text("Email me at user@example.com",normalization_options=NormalizationOptions()) == "Email me at user at example dot com" ) - assert normalize_text("Contact admin@test.org") == "Contact admin at test dot org" + assert normalize_text("Contact admin@test.org",normalization_options=NormalizationOptions()) == "Contact admin at test dot org" assert ( - normalize_text("Send to test.user@site.com") + normalize_text("Send to test.user@site.com",normalization_options=NormalizationOptions()) == "Send to test dot user at site dot com" ) def test_non_url_text(): """Test that non-URL text is unaffected""" - assert normalize_text("This is not.a.url text") == "This is not-a-url text" - assert normalize_text("Hello, how are you today?") == "Hello, how are you today?" - assert normalize_text("It costs $50.") == "It costs 50 dollars." + assert normalize_text("This is not.a.url text",normalization_options=NormalizationOptions()) == "This is not-a-url text" + assert normalize_text("Hello, how are you today?",normalization_options=NormalizationOptions()) == "Hello, how are you today?" + assert normalize_text("It costs $50.",normalization_options=NormalizationOptions()) == "It costs 50 dollars."