Swapped generator to preprocessing

2025-04-13 09:39:17 +00:00 · 2025-01-04 22:23:59 -07:00 · 2025-01-04 22:23:59 -07:00 · 4c6cd83f85
commit 4c6cd83f85
parent e799f0c7c1
23 changed files with 955 additions and 127 deletions
--- a/.coverage
+++ b/.coverage
--- a/.gitignore
+++ b/.gitignore
@ -25,3 +25,5 @@ examples/assorted_checks/test_voices/output/*
 examples/assorted_checks/test_formats/output/*
 examples/assorted_checks/benchmarks/output_audio_stream/*
 ui/RepoScreenshot.png
+examples/assorted_checks/benchmarks/output_audio_stream_openai/*
+
--- a/api/src/main.py
+++ b/api/src/main.py
@ -24,16 +24,16 @@ async def lifespan(app: FastAPI):
    # Initialize the main model with warm-up
    voicepack_count = TTSModel.setup()
    # boundary = "█████╗"*9
-    boundary = "░" * 54
+    boundary = "░" * 30
    startup_msg =f"""
 {boundary}

-                     ╔═╗┌─┐┌─┐┌┬┐
-                     ╠╣ ├─┤└─┐ │ 
-                     ╚  ┴ ┴└─┘ ┴ 
-                     ╦╔═┌─┐┬┌─┌─┐
-                     ╠╩╗│ │├┴┐│ │
-                     ╩ ╩└─┘┴ ┴└─┘
+    ╔═╗┌─┐┌─┐┌┬┐
+    ╠╣ ├─┤└─┐ │ 
+    ╚  ┴ ┴└─┘ ┴ 
+    ╦╔═┌─┐┬┌─┌─┐
+    ╠╩╗│ │├┴┐│ │
+    ╩ ╩└─┘┴ ┴└─┘

 {boundary}
                """
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@ -57,10 +57,8 @@ async def create_speech(
            "pcm": "audio/pcm",
        }.get(request.response_format, f"audio/{request.response_format}")

-        # Check if streaming is requested via header
-        is_streaming = x_raw_response == "stream"
-
-        if is_streaming:
+        # Check if streaming is requested (default for OpenAI client)
+        if request.stream:
            # Stream audio chunks as they're generated
            return StreamingResponse(
                stream_audio_chunks(tts_service, request),
--- a/api/src/services/text_processing/normalizer.py
+++ b/api/src/services/text_processing/normalizer.py
@ -49,7 +49,7 @@ def handle_decimal(num: re.Match) -> str:
    a, b = num.group().split(".")
    return " point ".join([a, " ".join(b)])

-@lru_cache(maxsize=1000)  # Cache normalized text results
+# @lru_cache(maxsize=1000)  # Cache normalized text results
 def normalize_text(text: str) -> str:
    """Normalize text for TTS processing
    
--- a/api/src/services/tts_service.py
+++ b/api/src/services/tts_service.py
@ -20,11 +20,40 @@ class TTSService:
    def __init__(self, output_dir: str = None):
        self.output_dir = output_dir

-    def _split_text(self, text: str) -> List[str]:
-        """Split text into sentences"""
+    def _split_text(self, text: str):
+        """Generate text chunks one at a time, splitting on natural pause points"""
        if not isinstance(text, str):
            text = str(text) if text is not None else ""
-        return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
+            
+        # First split into sentences
+        sentences = re.split(r"(?<=[.!?])\s+", text)
+        
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if not sentence:
+                continue
+                
+            # For longer sentences, split on commas and semicolons
+            if len(sentence) > 300:  # Only split long sentences
+                # Split on pause points while preserving the punctuation
+                chunks = re.split(r"((?<=[,;])\s+)", sentence)
+                
+                # Reassemble chunks with their trailing punctuation
+                current_chunk = ""
+                for i, chunk in enumerate(chunks):
+                    if i % 2 == 0:  # Text chunk
+                        current_chunk += chunk
+                    else:  # Punctuation/whitespace chunk
+                        current_chunk += chunk
+                        if current_chunk.strip():
+                            yield current_chunk.strip()
+                        current_chunk = ""
+                        
+                # Yield any remaining text
+                if current_chunk.strip():
+                    yield current_chunk.strip()
+            else:
+                yield sentence

    @staticmethod
    @lru_cache(maxsize=20)  # Cache up to 8 most recently used voices
@ -69,11 +98,11 @@ class TTSService:

            # Generate audio with or without stitching
            if stitch_long_output:
-                chunks = self._split_text(text)
                audio_chunks = []
+                chunk_count = 0

-                # Process all chunks
-                for i, chunk in enumerate(chunks):
+                # Process chunks as they're generated
+                for chunk in self._split_text(text):
                    try:
                        # Process text and generate audio
                        phonemes, tokens = TTSModel.process_text(chunk, voice[0])
@ -81,23 +110,21 @@ class TTSService:
    
                        if chunk_audio is not None:
                            audio_chunks.append(chunk_audio)
+                            chunk_count += 1
                        else:
-                            logger.error(f"No audio generated for chunk {i + 1}/{len(chunks)}")
+                            logger.error(f"No audio generated for chunk {chunk_count + 1}")
                            
                    except Exception as e:
                        logger.error(
-                            f"Failed to generate audio for chunk {i + 1}/{len(chunks)}: '{chunk}'. Error: {str(e)}"
+                            f"Failed to generate audio for chunk {chunk_count + 1}: '{chunk}'. Error: {str(e)}"
                        )
                        continue

                if not audio_chunks:
                    raise ValueError("No audio chunks were generated successfully")

-                audio = (
-                    np.concatenate(audio_chunks)
-                    if len(audio_chunks) > 1
-                    else audio_chunks[0]
-                )
+                # Concatenate all chunks
+                audio = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
            else:
                # Process single chunk
                phonemes, tokens = TTSModel.process_text(text, voice[0])
@ -132,11 +159,9 @@ class TTSService:
                raise ValueError(f"Voice not found: {voice}")
            voicepack = self._load_voice(voice_path)

-            # Split text into sentences for natural boundaries
-            chunks = self._split_text(text)
-
-            # Process and stream chunks
-            for i, chunk in enumerate(chunks):
+            # Process chunks as they're generated
+            is_first = True
+            for chunk in self._split_text(text):
                try:
                    # Process text and generate audio
                    phonemes, tokens = TTSModel.process_text(chunk, voice[0])
@ -148,17 +173,16 @@ class TTSService:
                            chunk_audio,
                            24000,
                            output_format,
-                            is_first_chunk=(i == 0),
+                            is_first_chunk=is_first,
                            normalizer=stream_normalizer
                        )
                        yield chunk_bytes
+                        is_first = False
                    else:
-                        logger.error(f"No audio generated for chunk {i + 1}/{len(chunks)}")
+                        logger.error(f"No audio generated for chunk: '{chunk}'")

                except Exception as e:
-                    logger.error(
-                        f"Failed to generate audio for chunk {i + 1}/{len(chunks)}: '{chunk}'. Error: {str(e)}"
-                    )
+                    logger.error(f"Failed to generate audio for chunk: '{chunk}'. Error: {str(e)}")
                    continue

        except Exception as e:
--- a/api/src/structures/schemas.py
+++ b/api/src/structures/schemas.py
@ -31,6 +31,6 @@ class OpenAISpeechRequest(BaseModel):
        description="The speed of the generated audio. Select a value from 0.25 to 4.0.",
    )
    stream: bool = Field(
-        default=False,
-        description="If true, audio will be streamed as it's generated. Each chunk will be a complete sentence.",
+        default=True,  # Default to streaming for OpenAI compatibility
+        description="If true (default), audio will be streamed as it's generated. Each chunk will be a complete sentence.",
    )
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -32,8 +32,10 @@ services:
      start_period: 1s

  kokoro-tts:
-    build:
-      context: .
+    image: ghcr.io/remsky/kokoro-fastapi:latest
+    # Uncomment below to build from source instead of using the released image
+    # build:
+    #   context: .
    volumes:
      - ./api/src:/app/api/src
      - ./Kokoro-82M:/app/Kokoro-82M
--- a/examples/assorted_checks/benchmarks/benchmark_first_token_stream.py
+++ b/examples/assorted_checks/benchmarks/benchmark_first_token_stream.py
@ -31,7 +31,7 @@ def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int
                "model": "kokoro",
                "input": text,
                "voice": "af",
-                "response_format": "wav",
+                "response_format": "pcm",
                "stream": True
            },
            stream=True,
@ -53,33 +53,19 @@ def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int
                    results["time_to_first_chunk"] = first_chunk_time - start_time
                chunks.append(chunk)
        
-        # Extract WAV header and data separately
-        # First chunk has header + data, subsequent chunks are raw PCM
+        # Concatenate all PCM chunks
        if not chunks:
            raise ValueError("No audio chunks received")
            
-        first_chunk = chunks[0]
-        remaining_chunks = chunks[1:]
+        all_audio_data = b''.join(chunks)
        
-        # Find end of WAV header (44 bytes for standard WAV)
-        header = first_chunk[:44]
-        first_data = first_chunk[44:]
-        
-        # Concatenate all PCM data
-        all_data = first_data + b''.join(remaining_chunks)
-        
-        # Update WAV header with total data size
-        import struct
-        data_size = len(all_data)
-        # Update data size field (bytes 4-7)
-        header = header[:4] + struct.pack('<I', data_size + 36) + header[8:]
-        # Update subchunk2 size field (bytes 40-43)
-        header = header[:40] + struct.pack('<I', data_size) + header[44:]
-        
-        # Write complete WAV file
-        complete_audio = header + all_data
-        with open(audio_path, 'wb') as f:
-            f.write(complete_audio)
+        # Write as WAV file
+        import wave
+        with wave.open(audio_path, 'wb') as wav_file:
+            wav_file.setnchannels(1)  # Mono
+            wav_file.setsampwidth(2)  # 2 bytes per sample (16-bit)
+            wav_file.setframerate(24000)  # Known sample rate for Kokoro
+            wav_file.writeframes(all_audio_data)
        
        # Calculate audio length using scipy
        import scipy.io.wavfile as wavfile
@ -89,7 +75,7 @@ def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int
        results["total_time"] = time.time() - start_time
        
        # Print debug info
-        print(f"Complete audio size: {len(complete_audio)} bytes")
+        print(f"Complete audio size: {len(all_audio_data)} bytes")
        print(f"Number of chunks received: {len(chunks)}")
        print(f"Audio length: {results['audio_length']:.3f}s")
        
@ -114,7 +100,7 @@ def main():
        text = f.read()

    # Test specific token counts
-    token_sizes = [50, 100, 200, 500]
+    token_sizes = [50, 100, 200, 500, 1000, 2000, 5000, 10000]
    all_results = []
    
    for tokens in token_sizes:
@ -124,7 +110,7 @@ def main():
        print(f"Text preview: {test_text[:50]}...")
        
        # Run test 3 times for each size to get average
-        for i in range(3):
+        for i in range(5):
            print(f"Run {i+1}/3...")
            result = measure_first_token(test_text, output_dir, tokens, i + 1)
            result["target_tokens"] = tokens
@ -194,7 +180,7 @@ def main():
    
    plot_timeline(
        df,
-        os.path.join(output_plots_dir, "first_token_timeline_stream.png")
+        os.path.join(output_plots_dir, "first_token_timeline_stream.png", suffix="(Streaming)")
    )
    
    print("\nResults and plots saved to:")
--- a/examples/assorted_checks/benchmarks/benchmark_first_token_stream_openai.py
+++ b/examples/assorted_checks/benchmarks/benchmark_first_token_stream_openai.py
@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+import os
+import time
+import json
+import numpy as np
+import pandas as pd
+from openai import OpenAI
+from lib.shared_benchmark_utils import get_text_for_tokens, enc
+from lib.shared_utils import save_json_results
+from lib.shared_plotting import plot_correlation, plot_timeline
+
+def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int) -> dict:
+    """Measure time to audio via OpenAI API calls and save the audio output"""
+    results = {
+        "text_length": len(text),
+        "token_count": len(enc.encode(text)),
+        "total_time": None,
+        "time_to_first_chunk": None,
+        "error": None,
+        "audio_path": None,
+        "audio_length": None  # Length of output audio in seconds
+    }
+    
+    try:
+        start_time = time.time()
+        
+        # Initialize OpenAI client
+        openai = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed-for-local")
+        
+        # Save complete audio
+        audio_filename = f"benchmark_tokens{tokens}_run{run_number}_stream_openai.wav"
+        audio_path = os.path.join(output_dir, audio_filename)
+        results["audio_path"] = audio_path
+        
+        first_chunk_time = None
+        all_audio_data = bytearray()
+        chunk_count = 0
+        
+        # Make streaming request using OpenAI client
+        with openai.audio.speech.with_streaming_response.create(
+            model="kokoro",
+            voice="af",
+            response_format="pcm",
+            input=text,
+        ) as response:
+            for chunk in response.iter_bytes(chunk_size=1024):
+                if chunk:
+                    chunk_count += 1
+                    if first_chunk_time is None:
+                        first_chunk_time = time.time()
+                        results["time_to_first_chunk"] = first_chunk_time - start_time
+                    all_audio_data.extend(chunk)
+        
+        # Write as WAV file
+        import wave
+        with wave.open(audio_path, 'wb') as wav_file:
+            wav_file.setnchannels(1)  # Mono
+            wav_file.setsampwidth(2)  # 2 bytes per sample (16-bit)
+            wav_file.setframerate(24000)  # Known sample rate for Kokoro
+            wav_file.writeframes(all_audio_data)
+        
+        # Calculate audio length using scipy
+        import scipy.io.wavfile as wavfile
+        sample_rate, audio_data = wavfile.read(audio_path)
+        results["audio_length"] = len(audio_data) / sample_rate  # Length in seconds
+        
+        results["total_time"] = time.time() - start_time
+        
+        # Print debug info
+        print(f"Complete audio size: {len(all_audio_data)} bytes")
+        print(f"Number of chunks received: {chunk_count}")
+        print(f"Audio length: {results['audio_length']:.3f}s")
+        
+        return results
+        
+    except Exception as e:
+        results["error"] = str(e)
+        return results
+
+def main():
+    # Set up paths with _stream_openai suffix
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    output_dir = os.path.join(script_dir, "output_audio_stream_openai")
+    output_data_dir = os.path.join(script_dir, "output_data")
+    
+    # Create output directories
+    os.makedirs(output_dir, exist_ok=True)
+    os.makedirs(output_data_dir, exist_ok=True)
+
+    # Load sample text
+    with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
+        text = f.read()
+
+    # Test specific token counts
+    token_sizes = [50, 100, 200, 500]
+    all_results = []
+    
+    for tokens in token_sizes:
+        print(f"\nTesting {tokens} tokens (streaming)")
+        test_text = get_text_for_tokens(text, tokens)
+        actual_tokens = len(enc.encode(test_text))
+        print(f"Text preview: {test_text[:50]}...")
+        
+        # Run test 5 times for each size to get average
+        for i in range(5):
+            print(f"Run {i+1}/5...")
+            result = measure_first_token(test_text, output_dir, tokens, i + 1)
+            result["target_tokens"] = tokens
+            result["actual_tokens"] = actual_tokens
+            result["run_number"] = i + 1
+            
+            print(f"Time to First Audio: {result.get('time_to_first_chunk', 'N/A'):.3f}s")
+            print(f"Time to Save Complete: {result.get('total_time', 'N/A'):.3f}s")
+            print(f"Audio length: {result.get('audio_length', 'N/A'):.3f}s")
+            print(f"Streaming overhead: {(result.get('total_time', 0) - result.get('time_to_first_chunk', 0)):.3f}s")
+            
+            if result["error"]:
+                print(f"Error: {result['error']}")
+            
+            all_results.append(result)
+    
+    # Calculate averages per token size
+    summary = {}
+    for tokens in token_sizes:
+        matching_results = [r for r in all_results if r["target_tokens"] == tokens and not r["error"]]
+        if matching_results:
+            avg_first_chunk = sum(r["time_to_first_chunk"] for r in matching_results) / len(matching_results)
+            avg_total = sum(r["total_time"] for r in matching_results) / len(matching_results)
+            avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(matching_results)
+            summary[tokens] = {
+                "avg_time_to_first_chunk": round(avg_first_chunk, 3),
+                "avg_total_time": round(avg_total, 3),
+                "avg_audio_length": round(avg_audio_length, 3),
+                "num_successful_runs": len(matching_results)
+            }
+    
+    # Save results with _stream_openai suffix
+    results_data = {
+        "individual_runs": all_results,
+        "summary": summary,
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
+    }
+    save_json_results(
+        results_data,
+        os.path.join(output_data_dir, "first_token_benchmark_stream_openai.json")
+    )
+    
+    # Create plot directory if it doesn't exist
+    output_plots_dir = os.path.join(script_dir, "output_plots")
+    os.makedirs(output_plots_dir, exist_ok=True)
+    
+    # Create DataFrame for plotting
+    df = pd.DataFrame(all_results)
+    
+    # Create plots with _stream_openai suffix
+    plot_correlation(
+        df, "target_tokens", "time_to_first_chunk",
+        "Time to First Audio vs Input Size (OpenAI Streaming)",
+        "Number of Input Tokens",
+        "Time to First Audio (seconds)",
+        os.path.join(output_plots_dir, "first_token_latency_stream_openai.png")
+    )
+    
+    plot_correlation(
+        df, "target_tokens", "total_time",
+        "Total Time vs Input Size (OpenAI Streaming)",
+        "Number of Input Tokens",
+        "Total Time (seconds)",
+        os.path.join(output_plots_dir, "total_time_latency_stream_openai.png")
+    )
+    
+    plot_timeline(
+        df,
+        os.path.join(output_plots_dir, "first_token_timeline_stream_openai.png")
+    )
+    
+    print("\nResults and plots saved to:")
+    print(f"- {os.path.join(output_data_dir, 'first_token_benchmark_stream_openai.json')}")
+    print(f"- {os.path.join(output_plots_dir, 'first_token_latency_stream_openai.png')}")
+    print(f"- {os.path.join(output_plots_dir, 'total_time_latency_stream_openai.png')}")
+    print(f"- {os.path.join(output_plots_dir, 'first_token_timeline_stream_openai.png')}")
+
+if __name__ == "__main__":
+    main()
--- a/examples/assorted_checks/benchmarks/lib/shared_plotting.py
+++ b/examples/assorted_checks/benchmarks/lib/shared_plotting.py
@ -138,7 +138,7 @@ def plot_system_metrics(metrics_data, output_path):
    plt.savefig(output_path, dpi=300, bbox_inches="tight")
    plt.close()

-def plot_timeline(df, output_path):
+def plot_timeline(df, output_path, suffix=""):
    """Create timeline plot showing latency for each run.
    
    Args:
@ -255,7 +255,7 @@ def plot_timeline(df, output_path):
    # Customize appearance
    setup_plot(
        fig, ax,
-        "Time-To-Audio Latency",
+        "Time-To-Audio Latency" + suffix,
        xlabel="Time (seconds)",
        ylabel="Input Size"
    )
--- a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json
+++ b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json
@ -3,11 +3,11 @@
    {
      "text_length": 212,
      "token_count": 50,
-      "total_time": 0.9603095054626465,
-      "time_to_first_chunk": 0.5916037559509277,
+      "total_time": 0.7278211116790771,
+      "time_to_first_chunk": 0.3613290786743164,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run1_stream.wav",
-      "audio_length": 15.45,
+      "audio_length": 16.325,
      "target_tokens": 50,
      "actual_tokens": 50,
      "run_number": 1
@ -15,11 +15,11 @@
    {
      "text_length": 212,
      "token_count": 50,
-      "total_time": 0.5130870342254639,
-      "time_to_first_chunk": 0.27448558807373047,
+      "total_time": 0.4556088447570801,
+      "time_to_first_chunk": 0.18642044067382812,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run2_stream.wav",
-      "audio_length": 15.45,
+      "audio_length": 16.325,
      "target_tokens": 50,
      "actual_tokens": 50,
      "run_number": 2
@ -27,23 +27,47 @@
    {
      "text_length": 212,
      "token_count": 50,
-      "total_time": 0.4667215347290039,
-      "time_to_first_chunk": 0.22882533073425293,
+      "total_time": 0.5538768768310547,
+      "time_to_first_chunk": 0.2720797061920166,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run3_stream.wav",
-      "audio_length": 15.45,
+      "audio_length": 16.325,
      "target_tokens": 50,
      "actual_tokens": 50,
      "run_number": 3
    },
+    {
+      "text_length": 212,
+      "token_count": 50,
+      "total_time": 0.4395604133605957,
+      "time_to_first_chunk": 0.15613913536071777,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run4_stream.wav",
+      "audio_length": 16.325,
+      "target_tokens": 50,
+      "actual_tokens": 50,
+      "run_number": 4
+    },
+    {
+      "text_length": 212,
+      "token_count": 50,
+      "total_time": 0.45748305320739746,
+      "time_to_first_chunk": 0.18805718421936035,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run5_stream.wav",
+      "audio_length": 16.325,
+      "target_tokens": 50,
+      "actual_tokens": 50,
+      "run_number": 5
+    },
    {
      "text_length": 448,
      "token_count": 100,
-      "total_time": 0.9051008224487305,
-      "time_to_first_chunk": 0.2526383399963379,
+      "total_time": 0.7347762584686279,
+      "time_to_first_chunk": 0.16963744163513184,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run1_stream.wav",
-      "audio_length": 30.25,
+      "audio_length": 31.1,
      "target_tokens": 100,
      "actual_tokens": 100,
      "run_number": 1
@ -51,11 +75,11 @@
    {
      "text_length": 448,
      "token_count": 100,
-      "total_time": 0.8579132556915283,
-      "time_to_first_chunk": 0.25691914558410645,
+      "total_time": 0.8288509845733643,
+      "time_to_first_chunk": 0.20123004913330078,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run2_stream.wav",
-      "audio_length": 30.25,
+      "audio_length": 31.1,
      "target_tokens": 100,
      "actual_tokens": 100,
      "run_number": 2
@ -63,23 +87,47 @@
    {
      "text_length": 448,
      "token_count": 100,
-      "total_time": 0.9683890342712402,
-      "time_to_first_chunk": 0.26229000091552734,
+      "total_time": 0.7503848075866699,
+      "time_to_first_chunk": 0.21662068367004395,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run3_stream.wav",
-      "audio_length": 30.25,
+      "audio_length": 31.1,
      "target_tokens": 100,
      "actual_tokens": 100,
      "run_number": 3
    },
+    {
+      "text_length": 448,
+      "token_count": 100,
+      "total_time": 0.694899320602417,
+      "time_to_first_chunk": 0.1966841220855713,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run4_stream.wav",
+      "audio_length": 31.1,
+      "target_tokens": 100,
+      "actual_tokens": 100,
+      "run_number": 4
+    },
+    {
+      "text_length": 448,
+      "token_count": 100,
+      "total_time": 0.68701171875,
+      "time_to_first_chunk": 0.19341063499450684,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run5_stream.wav",
+      "audio_length": 31.1,
+      "target_tokens": 100,
+      "actual_tokens": 100,
+      "run_number": 5
+    },
    {
      "text_length": 906,
      "token_count": 200,
-      "total_time": 1.8075971603393555,
-      "time_to_first_chunk": 0.22536945343017578,
+      "total_time": 1.6845426559448242,
+      "time_to_first_chunk": 0.21096158027648926,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run1_stream.wav",
-      "audio_length": 60.75,
+      "audio_length": 62.625,
      "target_tokens": 200,
      "actual_tokens": 200,
      "run_number": 1
@ -87,11 +135,11 @@
    {
      "text_length": 906,
      "token_count": 200,
-      "total_time": 1.493518590927124,
-      "time_to_first_chunk": 0.21502947807312012,
+      "total_time": 1.3545098304748535,
+      "time_to_first_chunk": 0.18648386001586914,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run2_stream.wav",
-      "audio_length": 60.75,
+      "audio_length": 62.625,
      "target_tokens": 200,
      "actual_tokens": 200,
      "run_number": 2
@ -99,23 +147,47 @@
    {
      "text_length": 906,
      "token_count": 200,
-      "total_time": 1.4910809993743896,
-      "time_to_first_chunk": 0.21600556373596191,
+      "total_time": 1.426060676574707,
+      "time_to_first_chunk": 0.20081472396850586,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run3_stream.wav",
-      "audio_length": 60.75,
+      "audio_length": 62.625,
      "target_tokens": 200,
      "actual_tokens": 200,
      "run_number": 3
    },
+    {
+      "text_length": 906,
+      "token_count": 200,
+      "total_time": 1.4084081649780273,
+      "time_to_first_chunk": 0.18551135063171387,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run4_stream.wav",
+      "audio_length": 62.625,
+      "target_tokens": 200,
+      "actual_tokens": 200,
+      "run_number": 4
+    },
+    {
+      "text_length": 906,
+      "token_count": 200,
+      "total_time": 1.4703152179718018,
+      "time_to_first_chunk": 0.17750859260559082,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run5_stream.wav",
+      "audio_length": 62.625,
+      "target_tokens": 200,
+      "actual_tokens": 200,
+      "run_number": 5
+    },
    {
      "text_length": 2232,
      "token_count": 500,
-      "total_time": 4.223623275756836,
-      "time_to_first_chunk": 0.20010590553283691,
+      "total_time": 4.289574384689331,
+      "time_to_first_chunk": 0.1997976303100586,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run1_stream.wav",
-      "audio_length": 147.775,
+      "audio_length": 157.875,
      "target_tokens": 500,
      "actual_tokens": 500,
      "run_number": 1
@ -123,11 +195,11 @@
    {
      "text_length": 2232,
      "token_count": 500,
-      "total_time": 3.8811349868774414,
-      "time_to_first_chunk": 0.24638962745666504,
+      "total_time": 3.7089381217956543,
+      "time_to_first_chunk": 0.25969815254211426,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run2_stream.wav",
-      "audio_length": 147.775,
+      "audio_length": 157.875,
      "target_tokens": 500,
      "actual_tokens": 500,
      "run_number": 2
@ -135,41 +207,65 @@
    {
      "text_length": 2232,
      "token_count": 500,
-      "total_time": 4.045536994934082,
-      "time_to_first_chunk": 0.2252039909362793,
+      "total_time": 4.138366222381592,
+      "time_to_first_chunk": 0.1831505298614502,
      "error": null,
      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run3_stream.wav",
-      "audio_length": 147.775,
+      "audio_length": 157.875,
      "target_tokens": 500,
      "actual_tokens": 500,
      "run_number": 3
+    },
+    {
+      "text_length": 2232,
+      "token_count": 500,
+      "total_time": 3.980635643005371,
+      "time_to_first_chunk": 0.20493030548095703,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run4_stream.wav",
+      "audio_length": 157.875,
+      "target_tokens": 500,
+      "actual_tokens": 500,
+      "run_number": 4
+    },
+    {
+      "text_length": 2232,
+      "token_count": 500,
+      "total_time": 4.1370298862457275,
+      "time_to_first_chunk": 0.19150757789611816,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run5_stream.wav",
+      "audio_length": 157.875,
+      "target_tokens": 500,
+      "actual_tokens": 500,
+      "run_number": 5
    }
  ],
  "summary": {
    "50": {
-      "avg_time_to_first_chunk": 0.365,
-      "avg_total_time": 0.647,
-      "avg_audio_length": 15.45,
-      "num_successful_runs": 3
+      "avg_time_to_first_chunk": 0.233,
+      "avg_total_time": 0.527,
+      "avg_audio_length": 16.325,
+      "num_successful_runs": 5
    },
    "100": {
-      "avg_time_to_first_chunk": 0.257,
-      "avg_total_time": 0.91,
-      "avg_audio_length": 30.25,
-      "num_successful_runs": 3
+      "avg_time_to_first_chunk": 0.196,
+      "avg_total_time": 0.739,
+      "avg_audio_length": 31.1,
+      "num_successful_runs": 5
    },
    "200": {
-      "avg_time_to_first_chunk": 0.219,
-      "avg_total_time": 1.597,
-      "avg_audio_length": 60.75,
-      "num_successful_runs": 3
+      "avg_time_to_first_chunk": 0.192,
+      "avg_total_time": 1.469,
+      "avg_audio_length": 62.625,
+      "num_successful_runs": 5
    },
    "500": {
-      "avg_time_to_first_chunk": 0.224,
-      "avg_total_time": 4.05,
-      "avg_audio_length": 147.775,
-      "num_successful_runs": 3
+      "avg_time_to_first_chunk": 0.208,
+      "avg_total_time": 4.051,
+      "avg_audio_length": 157.875,
+      "num_successful_runs": 5
    }
  },
-  "timestamp": "2025-01-04 14:59:28"
+  "timestamp": "2025-01-04 22:16:30"
 }
--- a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json
+++ b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json
@ -0,0 +1,271 @@
+{
+  "individual_runs": [
+    {
+      "text_length": 212,
+      "token_count": 50,
+      "total_time": 1.149611473083496,
+      "time_to_first_chunk": 0.8767304420471191,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run1_stream_openai.wav",
+      "audio_length": 16.325,
+      "target_tokens": 50,
+      "actual_tokens": 50,
+      "run_number": 1
+    },
+    {
+      "text_length": 212,
+      "token_count": 50,
+      "total_time": 0.9325947761535645,
+      "time_to_first_chunk": 0.5965914726257324,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run2_stream_openai.wav",
+      "audio_length": 16.325,
+      "target_tokens": 50,
+      "actual_tokens": 50,
+      "run_number": 2
+    },
+    {
+      "text_length": 212,
+      "token_count": 50,
+      "total_time": 0.9205234050750732,
+      "time_to_first_chunk": 0.5961906909942627,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run3_stream_openai.wav",
+      "audio_length": 16.325,
+      "target_tokens": 50,
+      "actual_tokens": 50,
+      "run_number": 3
+    },
+    {
+      "text_length": 212,
+      "token_count": 50,
+      "total_time": 1.1321916580200195,
+      "time_to_first_chunk": 0.6946916580200195,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run4_stream_openai.wav",
+      "audio_length": 16.325,
+      "target_tokens": 50,
+      "actual_tokens": 50,
+      "run_number": 4
+    },
+    {
+      "text_length": 212,
+      "token_count": 50,
+      "total_time": 1.1146185398101807,
+      "time_to_first_chunk": 0.6918885707855225,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run5_stream_openai.wav",
+      "audio_length": 16.325,
+      "target_tokens": 50,
+      "actual_tokens": 50,
+      "run_number": 5
+    },
+    {
+      "text_length": 448,
+      "token_count": 100,
+      "total_time": 1.3645410537719727,
+      "time_to_first_chunk": 0.6802399158477783,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run1_stream_openai.wav",
+      "audio_length": 31.1,
+      "target_tokens": 100,
+      "actual_tokens": 100,
+      "run_number": 1
+    },
+    {
+      "text_length": 448,
+      "token_count": 100,
+      "total_time": 1.4154777526855469,
+      "time_to_first_chunk": 0.7297353744506836,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run2_stream_openai.wav",
+      "audio_length": 31.1,
+      "target_tokens": 100,
+      "actual_tokens": 100,
+      "run_number": 2
+    },
+    {
+      "text_length": 448,
+      "token_count": 100,
+      "total_time": 1.3589520454406738,
+      "time_to_first_chunk": 0.698603630065918,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run3_stream_openai.wav",
+      "audio_length": 31.1,
+      "target_tokens": 100,
+      "actual_tokens": 100,
+      "run_number": 3
+    },
+    {
+      "text_length": 448,
+      "token_count": 100,
+      "total_time": 1.2276430130004883,
+      "time_to_first_chunk": 0.6705801486968994,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run4_stream_openai.wav",
+      "audio_length": 31.1,
+      "target_tokens": 100,
+      "actual_tokens": 100,
+      "run_number": 4
+    },
+    {
+      "text_length": 448,
+      "token_count": 100,
+      "total_time": 1.0949454307556152,
+      "time_to_first_chunk": 0.5698442459106445,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run5_stream_openai.wav",
+      "audio_length": 31.1,
+      "target_tokens": 100,
+      "actual_tokens": 100,
+      "run_number": 5
+    },
+    {
+      "text_length": 906,
+      "token_count": 200,
+      "total_time": 1.8211240768432617,
+      "time_to_first_chunk": 0.6070489883422852,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run1_stream_openai.wav",
+      "audio_length": 62.625,
+      "target_tokens": 200,
+      "actual_tokens": 200,
+      "run_number": 1
+    },
+    {
+      "text_length": 906,
+      "token_count": 200,
+      "total_time": 1.8376774787902832,
+      "time_to_first_chunk": 0.6538689136505127,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run2_stream_openai.wav",
+      "audio_length": 62.625,
+      "target_tokens": 200,
+      "actual_tokens": 200,
+      "run_number": 2
+    },
+    {
+      "text_length": 906,
+      "token_count": 200,
+      "total_time": 1.6953792572021484,
+      "time_to_first_chunk": 0.5554308891296387,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run3_stream_openai.wav",
+      "audio_length": 62.625,
+      "target_tokens": 200,
+      "actual_tokens": 200,
+      "run_number": 3
+    },
+    {
+      "text_length": 906,
+      "token_count": 200,
+      "total_time": 1.887030839920044,
+      "time_to_first_chunk": 0.5866930484771729,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run4_stream_openai.wav",
+      "audio_length": 62.625,
+      "target_tokens": 200,
+      "actual_tokens": 200,
+      "run_number": 4
+    },
+    {
+      "text_length": 906,
+      "token_count": 200,
+      "total_time": 1.7908406257629395,
+      "time_to_first_chunk": 0.5897490978240967,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run5_stream_openai.wav",
+      "audio_length": 62.625,
+      "target_tokens": 200,
+      "actual_tokens": 200,
+      "run_number": 5
+    },
+    {
+      "text_length": 2232,
+      "token_count": 500,
+      "total_time": 4.228837013244629,
+      "time_to_first_chunk": 0.5315976142883301,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run1_stream_openai.wav",
+      "audio_length": 157.875,
+      "target_tokens": 500,
+      "actual_tokens": 500,
+      "run_number": 1
+    },
+    {
+      "text_length": 2232,
+      "token_count": 500,
+      "total_time": 4.489210367202759,
+      "time_to_first_chunk": 0.5261838436126709,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run2_stream_openai.wav",
+      "audio_length": 157.875,
+      "target_tokens": 500,
+      "actual_tokens": 500,
+      "run_number": 2
+    },
+    {
+      "text_length": 2232,
+      "token_count": 500,
+      "total_time": 4.5290446281433105,
+      "time_to_first_chunk": 0.6186764240264893,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run3_stream_openai.wav",
+      "audio_length": 157.875,
+      "target_tokens": 500,
+      "actual_tokens": 500,
+      "run_number": 3
+    },
+    {
+      "text_length": 2232,
+      "token_count": 500,
+      "total_time": 4.209261178970337,
+      "time_to_first_chunk": 0.5990591049194336,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run4_stream_openai.wav",
+      "audio_length": 157.875,
+      "target_tokens": 500,
+      "actual_tokens": 500,
+      "run_number": 4
+    },
+    {
+      "text_length": 2232,
+      "token_count": 500,
+      "total_time": 4.218762636184692,
+      "time_to_first_chunk": 0.5466251373291016,
+      "error": null,
+      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run5_stream_openai.wav",
+      "audio_length": 157.875,
+      "target_tokens": 500,
+      "actual_tokens": 500,
+      "run_number": 5
+    }
+  ],
+  "summary": {
+    "50": {
+      "avg_time_to_first_chunk": 0.691,
+      "avg_total_time": 1.05,
+      "avg_audio_length": 16.325,
+      "num_successful_runs": 5
+    },
+    "100": {
+      "avg_time_to_first_chunk": 0.67,
+      "avg_total_time": 1.292,
+      "avg_audio_length": 31.1,
+      "num_successful_runs": 5
+    },
+    "200": {
+      "avg_time_to_first_chunk": 0.599,
+      "avg_total_time": 1.806,
+      "avg_audio_length": 62.625,
+      "num_successful_runs": 5
+    },
+    "500": {
+      "avg_time_to_first_chunk": 0.564,
+      "avg_total_time": 4.335,
+      "avg_audio_length": 157.875,
+      "num_successful_runs": 5
+    }
+  },
+  "timestamp": "2025-01-04 22:18:03"
+}
--- a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png
+++ b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png
--- a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png
+++ b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png
--- a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png
+++ b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png
--- a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png
+++ b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png
--- a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png
+++ b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png
--- a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png
+++ b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png
--- a/examples/assorted_checks/test_normalizer.py
+++ b/examples/assorted_checks/test_normalizer.py
@ -0,0 +1,268 @@
+import re
+import time
+import random
+import string
+from typing import List, Tuple
+
+def create_test_cases() -> List[str]:
+    """Create a variety of test cases with different characteristics"""
+    
+    # Helper to create random text with specific patterns
+    def random_text(length: int) -> str:
+        return ''.join(random.choice(string.ascii_letters + string.digits + " .,!?") for _ in range(length))
+    
+    test_cases = []
+    
+    # Base test cases that hit specific patterns
+    base_cases = [
+        "Dr. Smith and Mr. Jones discussed the $1,234.56 million investment.",
+        "Yeah, they met at 10:30 and reviewed A.B.C. documentation with Mrs. Brown etc.",
+        'The temperature was 72.5 degrees (quite normal) for "this time" of year.',
+        "X's and Y's properties cost £50 million in the 1990s",
+        "こんにちは。今日は！",
+    ]
+    
+    # Add base cases
+    test_cases.extend(base_cases)
+    
+    # Add variations with random content
+    for length in [100, 1000, 10000]:
+        # Create 3 variations of each length
+        for _ in range(3):
+            text = random_text(length)
+            # Insert some patterns we're looking for
+            text = text.replace(text[10:20], "Dr. Smith")
+            text = text.replace(text[30:40], "$1,234.56")
+            text = text.replace(text[50:60], "A.B.C. xyz")
+            test_cases.append(text)
+    
+    return test_cases
+
+class TextNormalizerInline:
+    """Text normalizer using inline patterns"""
+    
+    def normalize(self, text: str) -> str:
+        # Replace quotes and brackets
+        text = text.replace(chr(8216), "'").replace(chr(8217), "'")
+        text = text.replace("«", chr(8220)).replace("»", chr(8221))
+        text = text.replace(chr(8220), '"').replace(chr(8221), '"')
+        text = text.replace("(", "«").replace(")", "»")
+        
+        # Handle CJK punctuation
+        for a, b in zip("、。！，：；？", ",.!,:;?"):
+            text = text.replace(a, b + " ")
+        
+        text = re.sub(r"[^\S \n]", " ", text)
+        text = re.sub(r"  +", " ", text)
+        text = re.sub(r"(?<=\n) +(?=\n)", "", text)
+        text = re.sub(r"\bD[Rr]\.(?= [A-Z])", "Doctor", text)
+        text = re.sub(r"\b(?:Mr\.|MR\.(?= [A-Z]))", "Mister", text)
+        text = re.sub(r"\b(?:Ms\.|MS\.(?= [A-Z]))", "Miss", text)
+        text = re.sub(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))", "Mrs", text)
+        text = re.sub(r"\betc\.(?! [A-Z])", "etc", text)
+        text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text)
+        text = re.sub(r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)", split_num, text)
+        text = re.sub(r"(?<=\d),(?=\d)", "", text)
+        text = re.sub(r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b", handle_money, text)
+        text = re.sub(r"\d*\.\d+", handle_decimal, text)
+        text = re.sub(r"(?<=\d)-(?=\d)", " to ", text)
+        text = re.sub(r"(?<=\d)S", " S", text)
+        text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
+        text = re.sub(r"(?<=X')S\b", "s", text)
+        text = re.sub(r"(?:[A-Za-z]\.){2,} [a-z]", lambda m: m.group().replace(".", "-"), text)
+        text = re.sub(r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text)
+        
+        return text.strip()
+
+class TextNormalizerCompiled:
+    """Text normalizer using all compiled patterns"""
+    
+    def __init__(self):
+        self.patterns = {
+            'whitespace': re.compile(r"[^\S \n]"),
+            'multi_space': re.compile(r"  +"),
+            'newline_space': re.compile(r"(?<=\n) +(?=\n)"),
+            'doctor': re.compile(r"\bD[Rr]\.(?= [A-Z])"),
+            'mister': re.compile(r"\b(?:Mr\.|MR\.(?= [A-Z]))"),
+            'miss': re.compile(r"\b(?:Ms\.|MS\.(?= [A-Z]))"),
+            'mrs': re.compile(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))"),
+            'etc': re.compile(r"\betc\.(?! [A-Z])"),
+            'yeah': re.compile(r"(?i)\b(y)eah?\b"),
+            'numbers': re.compile(r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)"),
+            'comma_in_number': re.compile(r"(?<=\d),(?=\d)"),
+            'money': re.compile(r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b"),
+            'decimal': re.compile(r"\d*\.\d+"),
+            'range': re.compile(r"(?<=\d)-(?=\d)"),
+            's_after_number': re.compile(r"(?<=\d)S"),
+            'possessive_s': re.compile(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b"),
+            'x_possessive': re.compile(r"(?<=X')S\b"),
+            'initials': re.compile(r"(?:[A-Za-z]\.){2,} [a-z]"),
+            'single_initial': re.compile(r"(?i)(?<=[A-Z])\.(?=[A-Z])")
+        }
+    
+    def normalize(self, text: str) -> str:
+        # Replace quotes and brackets
+        text = text.replace(chr(8216), "'").replace(chr(8217), "'")
+        text = text.replace("«", chr(8220)).replace("»", chr(8221))
+        text = text.replace(chr(8220), '"').replace(chr(8221), '"')
+        text = text.replace("(", "«").replace(")", "»")
+        
+        # Handle CJK punctuation
+        for a, b in zip("、。！，：；？", ",.!,:;?"):
+            text = text.replace(a, b + " ")
+        
+        # Use compiled patterns
+        text = self.patterns['whitespace'].sub(" ", text)
+        text = self.patterns['multi_space'].sub(" ", text)
+        text = self.patterns['newline_space'].sub("", text)
+        text = self.patterns['doctor'].sub("Doctor", text)
+        text = self.patterns['mister'].sub("Mister", text)
+        text = self.patterns['miss'].sub("Miss", text)
+        text = self.patterns['mrs'].sub("Mrs", text)
+        text = self.patterns['etc'].sub("etc", text)
+        text = self.patterns['yeah'].sub(r"\1e'a", text)
+        text = self.patterns['numbers'].sub(split_num, text)
+        text = self.patterns['comma_in_number'].sub("", text)
+        text = self.patterns['money'].sub(handle_money, text)
+        text = self.patterns['decimal'].sub(handle_decimal, text)
+        text = self.patterns['range'].sub(" to ", text)
+        text = self.patterns['s_after_number'].sub(" S", text)
+        text = self.patterns['possessive_s'].sub("'S", text)
+        text = self.patterns['x_possessive'].sub("s", text)
+        text = self.patterns['initials'].sub(lambda m: m.group().replace(".", "-"), text)
+        text = self.patterns['single_initial'].sub("-", text)
+        
+        return text.strip()
+
+class TextNormalizerHybrid:
+    """Text normalizer using hybrid approach - compile only complex/frequent patterns"""
+    
+    def __init__(self):
+        # Only compile patterns that are complex or frequently used
+        self.patterns = {
+            'whitespace': re.compile(r"[^\S \n]"),
+            'numbers': re.compile(r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)"),
+            'money': re.compile(r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b"),
+            'initials': re.compile(r"(?:[A-Za-z]\.){2,} [a-z]")
+        }
+    
+    def normalize(self, text: str) -> str:
+        # Replace quotes and brackets
+        text = text.replace(chr(8216), "'").replace(chr(8217), "'")
+        text = text.replace("«", chr(8220)).replace("»", chr(8221))
+        text = text.replace(chr(8220), '"').replace(chr(8221), '"')
+        text = text.replace("(", "«").replace(")", "»")
+        
+        # Handle CJK punctuation
+        for a, b in zip("、。！，：；？", ",.!,:;?"):
+            text = text.replace(a, b + " ")
+        
+        # Use compiled patterns for complex operations
+        text = self.patterns['whitespace'].sub(" ", text)
+        text = self.patterns['numbers'].sub(split_num, text)
+        text = self.patterns['money'].sub(handle_money, text)
+        text = self.patterns['initials'].sub(lambda m: m.group().replace(".", "-"), text)
+        
+        # Use inline patterns for simpler operations
+        text = re.sub(r"  +", " ", text)
+        text = re.sub(r"(?<=\n) +(?=\n)", "", text)
+        text = re.sub(r"\bD[Rr]\.(?= [A-Z])", "Doctor", text)
+        text = re.sub(r"\b(?:Mr\.|MR\.(?= [A-Z]))", "Mister", text)
+        text = re.sub(r"\b(?:Ms\.|MS\.(?= [A-Z]))", "Miss", text)
+        text = re.sub(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))", "Mrs", text)
+        text = re.sub(r"\betc\.(?! [A-Z])", "etc", text)
+        text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text)
+        text = re.sub(r"(?<=\d),(?=\d)", "", text)
+        text = re.sub(r"\d*\.\d+", handle_decimal, text)
+        text = re.sub(r"(?<=\d)-(?=\d)", " to ", text)
+        text = re.sub(r"(?<=\d)S", " S", text)
+        text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
+        text = re.sub(r"(?<=X')S\b", "s", text)
+        text = re.sub(r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text)
+        
+        return text.strip()
+
+def split_num(match: re.Match) -> str:
+    """Split numbers for TTS processing"""
+    num = match.group(0)
+    if ":" in num:
+        h, m = num.split(":")
+        return f"{h} {m}"
+    if num.endswith("s"):
+        return f"{num[:-1]} s"
+    return num
+
+def handle_money(match: re.Match) -> str:
+    """Format money strings for TTS"""
+    text = match.group(0)
+    return text.replace("$", " dollars ").replace("£", " pounds ")
+
+def handle_decimal(match: re.Match) -> str:
+    """Format decimal numbers for TTS"""
+    num = match.group(0)
+    return num.replace(".", " point ")
+
+def benchmark_normalizers(test_cases: List[str], iterations: int = 100) -> Tuple[float, float, float]:
+    """Benchmark all three implementations"""
+    
+    normalizers = {
+        'inline': TextNormalizerInline(),
+        'compiled': TextNormalizerCompiled(),
+        'hybrid': TextNormalizerHybrid()
+    }
+    
+    results = {}
+    
+    # Test each normalizer
+    for name, normalizer in normalizers.items():
+        start = time.perf_counter()
+        
+        # Run normalizations
+        for _ in range(iterations):
+            for test in test_cases:
+                normalizer.normalize(test)
+        
+        results[name] = time.perf_counter() - start
+    
+    return results
+
+def verify_outputs(test_cases: List[str]) -> bool:
+    """Verify that all implementations produce identical output"""
+    normalizers = {
+        'inline': TextNormalizerInline(),
+        'compiled': TextNormalizerCompiled(),
+        'hybrid': TextNormalizerHybrid()
+    }
+    
+    for test in test_cases:
+        results = [norm.normalize(test) for norm in normalizers.values()]
+        if not all(r == results[0] for r in results):
+            return False
+    return True
+
+def main():
+    # Create test cases
+    print("Generating test cases...")
+    test_cases = create_test_cases()
+    total_chars = sum(len(t) for t in test_cases)
+    print(f"Created {len(test_cases)} test cases, total size: {total_chars:,} characters")
+    
+    # Verify output consistency
+    print("\nVerifying output consistency...")
+    if verify_outputs(test_cases):
+        print("✓ All implementations produce identical output")
+    else:
+        print("✗ Warning: Implementations produce different outputs!")
+        return
+    
+    # Run benchmarks
+    print("\nRunning benchmarks...")
+    iterations = 100
+    results = benchmark_normalizers(test_cases, iterations)
+    
+    # Print results
+    print(f"\nResults for {iterations} iterations: ")
+    for name, time_taken in results.items():
+        print(f"{name.capitalize()}: {time_taken:.3f}s")
+
+main()
--- a/examples/openai_streaming_audio.py
+++ b/examples/openai_streaming_audio.py
@ -36,10 +36,7 @@ def stream_to_speakers() -> None:
        model="kokoro",
        voice="af",
        response_format="pcm",  # similar to WAV, but without a header chunk at the start.
-        input="""I see skies of blue and clouds of white
-                The bright blessed days, the dark sacred nights
-                And I think to myself
-                What a wonderful world""",
+        input="""My dear sir, that is just where you are wrong. That is just where the whole world has gone wrong. We are always getting away from the present moment. Our mental existences, which are immaterial and have no dimensions, are passing along the Time-Dimension with a uniform velocity from the cradle to the grave. Just as we should travel down if we began our existence fifty miles above the earth’s surface""",
    ) as response:
        print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms")
        for chunk in response.iter_bytes(chunk_size=1024):
--- a/examples/output.wav
+++ b/examples/output.wav
--- a/examples/speech.mp3
+++ b/examples/speech.mp3