diff --git a/.coverage b/.coverage
index 3939b52..f449db0 100644
Binary files a/.coverage and b/.coverage differ
diff --git a/api/src/main.py b/api/src/main.py
index 50f31eb..9dec012 100644
--- a/api/src/main.py
+++ b/api/src/main.py
@@ -23,16 +23,25 @@ async def lifespan(app: FastAPI):
 
     # Initialize the main model with warm-up
     voicepack_count = TTSModel.setup()
-    logger.info("""
-    ███████╗█████╗█████████████████╗  ██╗██████╗██╗  ██╗██████╗ 
-    ██╔════██╔══████╔════╚══██╔══██║ ██╔██╔═══████║ ██╔██╔═══██╗
-    █████╗ ██████████████╗  ██║  █████╔╝██║   ███████╔╝██║   ██║
-    ██╔══╝ ██╔══██╚════██║  ██║  ██╔═██╗██║   ████╔═██╗██║   ██║
-    ██║    ██║  █████████║  ██║  ██║  ██╚██████╔██║  ██╚██████╔╝
-    ╚═╝    ╚═╝  ╚═╚══════╝  ╚═╝  ╚═╝  ╚═╝╚═════╝╚═╝  ╚═╝╚═════╝ """)
-    logger.info(f"Model loaded and warmed up on {TTSModel.get_device()}")
-    logger.info(f"{voicepack_count} voice packs loaded successfully")
-    logger.info("#" * 80)
+    # boundary = "█████╗"*9
+    boundary = "░" * 54
+    startup_msg =f"""
+{boundary}
+    
+                     ╔═╗┌─┐┌─┐┌┬┐
+                     ╠╣ ├─┤└─┐ │ 
+                     ╚  ┴ ┴└─┘ ┴ 
+                     ╦╔═┌─┐┬┌─┌─┐
+                     ╠╩╗│ │├┴┐│ │
+                     ╩ ╩└─┘┴ ┴└─┘
+
+{boundary}
+                """
+    startup_msg += f"\nModel loaded and warmed up on {TTSModel.get_device()}"
+    startup_msg += f"\n{voicepack_count} voice packs loaded successfully\n"
+    startup_msg += f"\n{boundary}\n"
+    logger.info(startup_msg)
+
     yield
 
 
diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py
index c8fa610..97279f6 100644
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@@ -2,8 +2,8 @@ from typing import List
 
 from loguru import logger
 from fastapi import Depends, Response, APIRouter, HTTPException
+from fastapi import Header
 from fastapi.responses import StreamingResponse
-
 from ..services.tts_service import TTSService
 from ..services.audio import AudioService
 from ..structures.schemas import OpenAISpeechRequest
@@ -30,9 +30,13 @@ async def stream_audio_chunks(tts_service: TTSService, request: OpenAISpeechRequ
     ):
         yield chunk
 
+
+
 @router.post("/audio/speech")
 async def create_speech(
-    request: OpenAISpeechRequest, tts_service: TTSService = Depends(get_tts_service)
+    request: OpenAISpeechRequest, 
+    tts_service: TTSService = Depends(get_tts_service),
+    x_raw_response: str = Header(None, alias="x-raw-response"),
 ):
     """OpenAI-compatible endpoint for text-to-speech"""
     try:
@@ -53,7 +57,10 @@ async def create_speech(
             "pcm": "audio/pcm",
         }.get(request.response_format, f"audio/{request.response_format}")
 
-        if request.stream:
+        # Check if streaming is requested via header
+        is_streaming = x_raw_response == "stream"
+
+        if is_streaming:
             # Stream audio chunks as they're generated
             return StreamingResponse(
                 stream_audio_chunks(tts_service, request),
diff --git a/api/src/services/audio.py b/api/src/services/audio.py
index 4883eed..f909519 100644
--- a/api/src/services/audio.py
+++ b/api/src/services/audio.py
@@ -14,17 +14,15 @@ class AudioNormalizer:
     
     def normalize(self, audio_data: np.ndarray) -> np.ndarray:
         """Normalize audio data to int16 range"""
-        # Convert to float64 for accurate scaling
-        audio_float = audio_data.astype(np.float64)
+        # Convert to float32 if not already
+        audio_float = audio_data.astype(np.float32)
         
-        # Scale to int16 range while preserving relative amplitudes
-        max_val = np.abs(audio_float).max()
-        if max_val > 0:
-            scaling = self.int16_max / max_val
-            audio_float *= scaling
-        
-        # Clip to int16 range and convert
-        return np.clip(audio_float, -self.int16_max, self.int16_max).astype(np.int16)
+        # Normalize to [-1, 1] range first
+        if np.max(np.abs(audio_float)) > 0:
+            audio_float = audio_float / np.max(np.abs(audio_float))
+            
+        # Scale to int16 range
+        return (audio_float * self.int16_max).astype(np.int16)
 
 class AudioService:
     """Service for audio format conversions"""
@@ -51,11 +49,10 @@ class AudioService:
         buffer = BytesIO()
 
         try:
-            # Normalize audio if normalizer provided, otherwise just convert to int16
-            if normalizer is not None:
-                normalized_audio = normalizer.normalize(audio_data)
-            else:
-                normalized_audio = audio_data.astype(np.int16)
+            # Always normalize audio to ensure proper amplitude scaling
+            if normalizer is None:
+                normalizer = AudioNormalizer()
+            normalized_audio = normalizer.normalize(audio_data)
 
             if output_format == "pcm":
                 logger.info("Writing PCM data...")
@@ -68,8 +65,7 @@ class AudioService:
             elif output_format in ["mp3", "aac"]:
                 logger.info(f"Converting to {output_format.upper()} format...")
                 # Use lower bitrate for streaming
-                sf.write(buffer, normalized_audio, sample_rate, format=output_format.upper(), 
-                        subtype='COMPRESSED')
+                sf.write(buffer, normalized_audio, sample_rate, format=output_format.upper())
             elif output_format == "opus":
                 logger.info("Converting to Opus format...")
                 # Use lower bitrate and smaller frame size for streaming
diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py
index 66c053b..ca73142 100644
--- a/api/src/services/tts_service.py
+++ b/api/src/services/tts_service.py
@@ -132,24 +132,8 @@ class TTSService:
                 raise ValueError(f"Voice not found: {voice}")
             voicepack = self._load_voice(voice_path)
 
-            # Split text into smaller chunks for faster streaming
-            # Use shorter chunks for real-time delivery
-            chunks = []
-            sentences = self._split_text(text)
-            current_chunk = []
-            current_length = 0
-            target_length = 100  # Target ~100 characters per chunk for faster processing
-
-            for sentence in sentences:
-                current_chunk.append(sentence)
-                current_length += len(sentence)
-                if current_length >= target_length:
-                    chunks.append(" ".join(current_chunk))
-                    current_chunk = []
-                    current_length = 0
-            
-            if current_chunk:
-                chunks.append(" ".join(current_chunk))
+            # Split text into sentences for natural boundaries
+            chunks = self._split_text(text)
 
             # Process and stream chunks
             for i, chunk in enumerate(chunks):
diff --git a/docker-compose.yml b/docker-compose.yml
index 7308745..8a962c2 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,20 +1,26 @@
 services:
   model-fetcher:
     image: datamachines/git-lfs:latest
+    environment:
+      - SKIP_MODEL_FETCH=${SKIP_MODEL_FETCH:-true}
     volumes:
       - ./Kokoro-82M:/app/Kokoro-82M
     working_dir: /app/Kokoro-82M
     command: >
       sh -c "
-        rm -f .git/index.lock;
-        if [ -z \"$(ls -A .)\" ]; then
-          git clone https://huggingface.co/hexgrad/Kokoro-82M .
-          touch .cloned;
+        if [ \"$$SKIP_MODEL_FETCH\" = \"true\" ]; then
+          echo 'Skipping model fetch...' && touch .cloned;
         else
-          rm -f .git/index.lock && \
-          git checkout main && \
-          git pull origin main && \
-          touch .cloned;
+          rm -f .git/index.lock;
+          if [ -z \"$(ls -A .)\" ]; then
+            git clone https://huggingface.co/hexgrad/Kokoro-82M .
+            touch .cloned;
+          else
+            rm -f .git/index.lock && \
+            git checkout main && \
+            git pull origin main && \
+            touch .cloned;
+          fi;
         fi;
         tail -f /dev/null
       "
diff --git a/examples/openai_streaming_audio.py b/examples/openai_streaming_audio.py
new file mode 100644
index 0000000..e0dddc6
--- /dev/null
+++ b/examples/openai_streaming_audio.py
@@ -0,0 +1,52 @@
+
+#!/usr/bin/env rye run python
+
+import time
+from pathlib import Path
+
+from openai import OpenAI
+
+# gets OPENAI_API_KEY from your environment variables
+openai = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed-for-local")
+
+speech_file_path = Path(__file__).parent / "speech.mp3"
+
+
+def main() -> None:
+    stream_to_speakers()
+
+    # Create text-to-speech audio file
+    with openai.audio.speech.with_streaming_response.create(
+        model="kokoro",
+        voice="af",
+        input="the quick brown fox jumped over the lazy dogs",
+    ) as response:
+        response.stream_to_file(speech_file_path)
+
+
+
+def stream_to_speakers() -> None:
+    import pyaudio
+
+    player_stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)
+
+    start_time = time.time()
+
+    with openai.audio.speech.with_streaming_response.create(
+        model="kokoro",
+        voice="af",
+        response_format="pcm",  # similar to WAV, but without a header chunk at the start.
+        input="""I see skies of blue and clouds of white
+                The bright blessed days, the dark sacred nights
+                And I think to myself
+                What a wonderful world""",
+    ) as response:
+        print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms")
+        for chunk in response.iter_bytes(chunk_size=1024):
+            player_stream.write(chunk)
+
+    print(f"Done in {int((time.time() - start_time) * 1000)}ms.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/output.wav b/examples/output.wav
deleted file mode 100644
index 4f23759..0000000
Binary files a/examples/output.wav and /dev/null differ
diff --git a/examples/speech.mp3 b/examples/speech.mp3
new file mode 100644
index 0000000..4cc0723
Binary files /dev/null and b/examples/speech.mp3 differ
diff --git a/examples/stream_tts_playback.py b/examples/stream_tts_playback.py
index 5670f50..70999a8 100644
--- a/examples/stream_tts_playback.py
+++ b/examples/stream_tts_playback.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 import requests
-import sounddevice as sd
 import numpy as np
+import sounddevice as sd
 import time
 import os
 import wave
@@ -15,12 +15,21 @@ def play_streaming_tts(text: str, output_file: str = None, voice: str = "af"):
     # Initialize variables
     sample_rate = 24000  # Known sample rate for Kokoro
     audio_started = False
-    stream = None
     chunk_count = 0
     total_bytes = 0
     first_chunk_time = None
     all_audio_data = bytearray()  # Raw PCM audio data
     
+    # Start sounddevice stream with buffer
+    stream = sd.OutputStream(
+        samplerate=sample_rate,
+        channels=1,
+        dtype=np.int16,
+        blocksize=1024,  # Buffer size in samples
+        latency='low'    # Request low latency
+    )
+    stream.start()
+    
     # Make streaming request to API
     try:
         response = requests.post(
@@ -38,8 +47,8 @@ def play_streaming_tts(text: str, output_file: str = None, voice: str = "af"):
         response.raise_for_status()
         print(f"Request started successfully after {time.time() - start_time:.2f}s")
         
-        # Process streaming response
-        for chunk in response.iter_content(chunk_size=1024):
+        # Process streaming response with smaller chunks for lower latency
+        for chunk in response.iter_content(chunk_size=512):  # 512 bytes = 256 samples at 16-bit
             if chunk:
                 chunk_count += 1
                 total_bytes += len(chunk)
@@ -49,40 +58,15 @@ def play_streaming_tts(text: str, output_file: str = None, voice: str = "af"):
                     first_chunk_time = time.time()
                     print(f"\nReceived first chunk after {first_chunk_time - start_time:.2f}s")
                     print(f"First chunk size: {len(chunk)} bytes")
-                    
-                    # Accumulate raw audio data
-                    all_audio_data.extend(chunk)
-                    
-                    # Convert PCM to float32 for playback
-                    audio_data = np.frombuffer(chunk, dtype=np.int16).astype(np.float32)
-                    # Scale to [-1, 1] range for sounddevice
-                    audio_data = audio_data / 32768.0
-                    
-                    # Start audio stream
-                    stream = sd.OutputStream(
-                        samplerate=sample_rate,
-                        channels=1,
-                        dtype=np.float32
-                    )
-                    stream.start()
                     audio_started = True
-                    print("Audio playback started")
-                    
-                    # Play first chunk
-                    if len(audio_data) > 0:
-                        stream.write(audio_data)
-                    
-                # Handle subsequent chunks
-                else:
-                    # Accumulate raw audio data
-                    all_audio_data.extend(chunk)
-                    
-                    # Convert PCM to float32 for playback
-                    audio_data = np.frombuffer(chunk, dtype=np.int16).astype(np.float32)
-                    audio_data = audio_data / 32768.0
-                    if len(audio_data) > 0:
-                        stream.write(audio_data)
-                        
+                
+                # Convert bytes to numpy array and play
+                audio_chunk = np.frombuffer(chunk, dtype=np.int16)
+                stream.write(audio_chunk)
+                
+                # Accumulate raw audio data
+                all_audio_data.extend(chunk)
+                
                 # Log progress every 10 chunks
                 if chunk_count % 10 == 0:
                     elapsed = time.time() - start_time
@@ -107,20 +91,17 @@ def play_streaming_tts(text: str, output_file: str = None, voice: str = "af"):
             print(f"Saved {len(all_audio_data)} bytes of audio data")
         
         # Clean up
-        if stream is not None:
-            stream.stop()
-            stream.close()
+        stream.stop()
+        stream.close()
             
     except requests.exceptions.ConnectionError as e:
         print(f"Connection error - Is the server running? Error: {str(e)}")
-        if stream is not None:
-            stream.stop()
-            stream.close()
+        stream.stop()
+        stream.close()
     except Exception as e:
         print(f"Error during streaming: {str(e)}")
-        if stream is not None:
-            stream.stop()
-            stream.close()
+        stream.stop()
+        stream.close()
 
 def main():
     # Load sample text from HG Wells