diff --git a/.coverage b/.coverage
deleted file mode 100644
index 42652cc..0000000
Binary files a/.coverage and /dev/null differ
diff --git a/README.md b/README.md
index 2e38852..8f83b33 100644
--- a/README.md
+++ b/README.md
@@ -129,7 +129,7 @@ response = requests.post(
 )
 ```
 <p align="center">
-  <img src="examples/benchmarks/analysis_comparison.png" width="80%" alt="Voice Analysis Comparison" style="border: 2px solid #333; padding: 10px;">
+  <img src="assets/voice_analysis.png" width="80%" alt="Voice Analysis Comparison" style="border: 2px solid #333; padding: 10px;">
 </p>
 </details>
 
@@ -144,7 +144,7 @@ response = requests.post(
 - pcm
 
 <p align="center">
-<img src="examples/benchmarks/format_comparison.png" width="80%" alt="Audio Format Comparison" style="border: 2px solid #333; padding: 10px;">
+<img src="assets/format_comparison.png" width="80%" alt="Audio Format Comparison" style="border: 2px solid #333; padding: 10px;">
 </p>
 
 </details>
@@ -175,8 +175,8 @@ Benchmarking was performed on generation via the local API using text lengths up
 - H.G. Wells - The Time Machine (full text)
 
 <p align="center">
-  <img src="examples/benchmarks/processing_time.png" width="45%" alt="Processing Time" style="border: 2px solid #333; padding: 10px; margin-right: 1%;">
-  <img src="examples/benchmarks/realtime_factor.png" width="45%" alt="Realtime Factor" style="border: 2px solid #333; padding: 10px;">
+  <img src="assets/gpu_processing_time.png" width="45%" alt="Processing Time" style="border: 2px solid #333; padding: 10px; margin-right: 1%;">
+  <img src="assets/gpu_realtime_factor.png" width="45%" alt="Realtime Factor" style="border: 2px solid #333; padding: 10px;">
 </p>
 
 Key Performance Metrics:
diff --git a/api/src/core/config.py b/api/src/core/config.py
index 5348730..ad0ef1c 100644
--- a/api/src/core/config.py
+++ b/api/src/core/config.py
@@ -18,6 +18,8 @@ class Settings(BaseSettings):
     onnx_model_path: str = "kokoro-v0_19.onnx"
     voices_dir: str = "voices"
     sample_rate: int = 24000
+    max_chunk_size: int = 300  # Maximum size of text chunks for processing
+    gap_trim_ms: int = 250  # Amount to trim from streaming chunk ends in milliseconds
     
     # ONNX Optimization Settings
     onnx_num_threads: int = 4  # Number of threads for intra-op parallelism
diff --git a/api/src/core/don_quixote.txt b/api/src/core/don_quixote.txt
new file mode 100644
index 0000000..a32a3a6
--- /dev/null
+++ b/api/src/core/don_quixote.txt
@@ -0,0 +1,9 @@
+In a village of La Mancha, the name of which I have no desire to call
+to mind, there lived not long since one of those gentlemen that keep a
+lance in the lance-rack, an old buckler, a lean hack, and a greyhound
+for coursing. An olla of rather more beef than mutton, a salad on most
+nights, scraps on Saturdays, lentils on Fridays, and a pigeon or so
+extra on Sundays, made away with three-quarters of his income. The rest
+of it went in a doublet of fine cloth and velvet breeches and shoes to
+match for holidays, while on week-days he made a brave figure in his
+best homespun. 
\ No newline at end of file
diff --git a/api/src/main.py b/api/src/main.py
index 4603e1f..fc51043 100644
--- a/api/src/main.py
+++ b/api/src/main.py
@@ -22,10 +22,11 @@ async def lifespan(app: FastAPI):
     logger.info("Loading TTS model and voice packs...")
 
     # Initialize the main model with warm-up
-    voicepack_count = TTSModel.setup()
+    voicepack_count = await TTSModel.setup()
     # boundary = "█████╗"*9
-    boundary = "░" * 30
+    boundary = "░" * 24
     startup_msg =f"""
+
 {boundary}
 
     ╔═╗┌─┐┌─┐┌┬┐
@@ -37,8 +38,9 @@ async def lifespan(app: FastAPI):
 
 {boundary}
                 """
-    startup_msg += f"\nModel loaded and warmed up on {TTSModel.get_device()}"
-    startup_msg += f"\n{voicepack_count} voice packs loaded successfully\n"
+    # TODO: Improve CPU warmup, threads, memory, etc
+    startup_msg += f"\nModel warmed up on {TTSModel.get_device()}"
+    startup_msg += f"\n{voicepack_count} voice packs loaded\n"
     startup_msg += f"\n{boundary}\n"
     logger.info(startup_msg)
 
diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py
index 5d465f5..2b30c7a 100644
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@@ -83,8 +83,8 @@ async def create_speech(
                 audio, 
                 24000, 
                 request.response_format,
-                is_first_chunk=True
-            )
+                is_first_chunk=True,
+                stream=False)
 
             return Response(
                 content=content,
diff --git a/api/src/services/audio.py b/api/src/services/audio.py
index e13d91f..dcb2a72 100644
--- a/api/src/services/audio.py
+++ b/api/src/services/audio.py
@@ -4,22 +4,30 @@ from io import BytesIO
 
 import numpy as np
 import soundfile as sf
+import scipy.io.wavfile as wavfile
 from loguru import logger
-
+from ..core.config import settings
 
 class AudioNormalizer:
     """Handles audio normalization state for a single stream"""
     def __init__(self):
         self.int16_max = np.iinfo(np.int16).max
+        self.chunk_trim_ms = settings.gap_trim_ms
+        self.sample_rate = 24000  # Sample rate of the audio
+        self.samples_to_trim = int(self.chunk_trim_ms * self.sample_rate / 1000)
     
-    def normalize(self, audio_data: np.ndarray) -> np.ndarray:
-        """Normalize audio data to int16 range"""
+    def normalize(self, audio_data: np.ndarray, is_last_chunk: bool = False) -> np.ndarray:
+        """Normalize audio data to int16 range and trim chunk boundaries"""
         # Convert to float32 if not already
         audio_float = audio_data.astype(np.float32)
         
         # Normalize to [-1, 1] range first
         if np.max(np.abs(audio_float)) > 0:
             audio_float = audio_float / np.max(np.abs(audio_float))
+        
+        # Trim end of non-final chunks to reduce gaps
+        if not is_last_chunk and len(audio_float) > self.samples_to_trim:
+            audio_float = audio_float[:-self.samples_to_trim]
             
         # Scale to int16 range
         return (audio_float * self.int16_max).astype(np.int16)
@@ -27,13 +35,30 @@ class AudioNormalizer:
 class AudioService:
     """Service for audio format conversions"""
     
+    # Default audio format settings balanced for speed and compression
+    DEFAULT_SETTINGS = {
+        "mp3": {
+            "bitrate_mode": "CONSTANT",  # Faster than variable bitrate
+            "compression_level": 0.0,  # Balanced compression
+        },
+        "opus": {
+            "compression_level": 0.0,  # Good balance for speech
+        },
+        "flac": {
+            "compression_level": 0.0,  # Light compression, still fast
+        }
+    }
+    
     @staticmethod
     def convert_audio(
         audio_data: np.ndarray, 
         sample_rate: int, 
         output_format: str, 
         is_first_chunk: bool = True,
-        normalizer: AudioNormalizer = None
+        is_last_chunk: bool = False,
+        normalizer: AudioNormalizer = None,
+        format_settings: dict = None,
+        stream: bool = True
     ) -> bytes:
         """Convert audio data to specified format
 
@@ -42,6 +67,19 @@ class AudioService:
             sample_rate: Sample rate of the audio
             output_format: Target format (wav, mp3, opus, flac, pcm)
             is_first_chunk: Whether this is the first chunk of a stream
+            normalizer: Optional AudioNormalizer instance for consistent normalization across chunks
+            format_settings: Optional dict of format-specific settings to override defaults
+                Example: {
+                    "mp3": {
+                        "bitrate_mode": "VARIABLE",
+                        "compression_level": 0.8
+                    }
+                }
+                Default settings balance speed and compression:
+                optimized for localhost @ 0.0
+                - MP3: constant bitrate, no compression (0.0)
+                - OPUS: no compression (0.0)
+                - FLAC: no compression (0.0)
 
         Returns:
             Bytes of the converted audio
@@ -50,31 +88,48 @@ class AudioService:
 
         try:
             # Always normalize audio to ensure proper amplitude scaling
-            if normalizer is None:
-                normalizer = AudioNormalizer()
-            normalized_audio = normalizer.normalize(audio_data)
+            if stream:
+                if normalizer is None:
+                    normalizer = AudioNormalizer()
+                normalized_audio = normalizer.normalize(audio_data, is_last_chunk=is_last_chunk)
+            else:
+                normalized_audio = audio_data
 
             if output_format == "pcm":
-                logger.info("Writing PCM data...")
                 # Raw 16-bit PCM samples, no header
                 buffer.write(normalized_audio.tobytes())
             elif output_format == "wav":
-                logger.info("Writing to WAV format...")
-                # Always include WAV header for WAV format
-                sf.write(buffer, normalized_audio, sample_rate, format="WAV", subtype='PCM_16')
+                if stream:
+                    # Use soundfile for streaming to ensure proper headers
+                    sf.write(buffer, normalized_audio, sample_rate, format="WAV", subtype='PCM_16')
+                else:
+                    # Trying scipy.io.wavfile for non-streaming WAV generation 
+                    # seems faster than soundfile
+                    # avoids overhead from header generation and PCM encoding
+                    wavfile.write(buffer, sample_rate, normalized_audio)
             elif output_format == "mp3":
-                logger.info("Converting to MP3 format...")
-                # Use lower bitrate for streaming
-                sf.write(buffer, normalized_audio, sample_rate, format="MP3")
+                # Use format settings or defaults
+                settings = format_settings.get("mp3", {}) if format_settings else {}
+                settings = {**AudioService.DEFAULT_SETTINGS["mp3"], **settings}
+                sf.write(
+                    buffer, normalized_audio, 
+                    sample_rate, format="MP3",
+                    **settings
+                    )
+                
             elif output_format == "opus":
-                logger.info("Converting to Opus format...")
-                # Use lower bitrate and smaller frame size for streaming
-                sf.write(buffer, normalized_audio, sample_rate, format="OGG", subtype="OPUS")
+                settings = format_settings.get("opus", {}) if format_settings else {}
+                settings = {**AudioService.DEFAULT_SETTINGS["opus"], **settings}
+                sf.write(buffer, normalized_audio, sample_rate, format="OGG", 
+                        subtype="OPUS", **settings)
+                
             elif output_format == "flac":
-                logger.info("Converting to FLAC format...")
-                # Use smaller block size for streaming
+                if is_first_chunk:
+                    logger.info("Starting FLAC stream...")
+                settings = format_settings.get("flac", {}) if format_settings else {}
+                settings = {**AudioService.DEFAULT_SETTINGS["flac"], **settings}
                 sf.write(buffer, normalized_audio, sample_rate, format="FLAC",
-                        subtype='PCM_16')
+                        subtype='PCM_16', **settings)
             else:
                 if output_format == "aac":
                     raise ValueError(
diff --git a/api/src/services/text_processing/chunker.py b/api/src/services/text_processing/chunker.py
new file mode 100644
index 0000000..c0c59eb
--- /dev/null
+++ b/api/src/services/text_processing/chunker.py
@@ -0,0 +1,52 @@
+"""Text chunking service"""
+
+import re
+from ...core.config import settings
+
+
+def split_text(text: str, max_chunk=None):
+    """Split text into chunks on natural pause points
+    
+    Args:
+        text: Text to split into chunks
+        max_chunk: Maximum chunk size (defaults to settings.max_chunk_size)
+    """
+    if max_chunk is None:
+        max_chunk = settings.max_chunk_size
+        
+    if not isinstance(text, str):
+        text = str(text) if text is not None else ""
+        
+    text = text.strip()
+    if not text:
+        return
+        
+    # First split into sentences
+    sentences = re.split(r"(?<=[.!?])\s+", text)
+    
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if not sentence:
+            continue
+            
+        # For medium-length sentences, split on punctuation
+        if len(sentence) > max_chunk:  # Lower threshold for more consistent sizes
+            # First try splitting on semicolons and colons
+            parts = re.split(r"(?<=[;:])\s+", sentence)
+            
+            for part in parts:
+                part = part.strip()
+                if not part:
+                    continue
+                    
+                # If part is still long, split on commas
+                if len(part) > max_chunk:
+                    subparts = re.split(r"(?<=,)\s+", part)
+                    for subpart in subparts:
+                        subpart = subpart.strip()
+                        if subpart:
+                            yield subpart
+                else:
+                    yield part
+        else:
+            yield sentence
diff --git a/api/src/services/tts_base.py b/api/src/services/tts_base.py
index f502373..16e8462 100644
--- a/api/src/services/tts_base.py
+++ b/api/src/services/tts_base.py
@@ -15,7 +15,7 @@ class TTSBaseModel(ABC):
     VOICES_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "voices")
 
     @classmethod
-    def setup(cls):
+    async def setup(cls):
         """Initialize model and setup voices"""
         with cls._lock:
             # Set device
@@ -59,19 +59,23 @@ class TTSBaseModel(ABC):
                             except Exception as e:
                                 logger.error(f"Error copying voice {voice_name}: {str(e)}")
 
-            # Warm up with default voice
+            # Load warmup text
             try:
-                dummy_text = "Hello"
-                voice_path = os.path.join(cls.VOICES_DIR, "af.pt")
-                dummy_voicepack = torch.load(voice_path, map_location=cls._device, weights_only=True)
-                
-                # Process text and generate audio
-                phonemes, tokens = cls.process_text(dummy_text, "a")
-                cls.generate_from_tokens(tokens, dummy_voicepack, 1.0)
-                
-                logger.info("Model warm-up complete")
+                with open(os.path.join(os.path.dirname(os.path.dirname(__file__)), "core", "don_quixote.txt")) as f:
+                    warmup_text = f.read()
             except Exception as e:
-                logger.warning(f"Model warm-up failed: {e}")
+                logger.warning(f"Failed to load warmup text: {e}")
+                warmup_text = "This is a warmup text that will be split into chunks for processing."
+
+            # Use warmup service
+            from .warmup import WarmupService
+            warmup = WarmupService()
+            
+            # Load and warm up voices
+            loaded_voices = warmup.load_voices()
+            await warmup.warmup_voices(warmup_text, loaded_voices)
+            
+            logger.info("Model warm-up complete")
 
             # Count voices in directory
             voice_count = len([f for f in os.listdir(cls.VOICES_DIR) if f.endswith(".pt")])
diff --git a/api/src/services/tts_gpu.py b/api/src/services/tts_gpu.py
index 300d141..51c8424 100644
--- a/api/src/services/tts_gpu.py
+++ b/api/src/services/tts_gpu.py
@@ -1,6 +1,7 @@
 import os
 import numpy as np
 import torch
+import time
 from loguru import logger
 from models import build_model
 from .text_processing import phonemize, tokenize
@@ -8,42 +9,97 @@ from .text_processing import phonemize, tokenize
 from .tts_base import TTSBaseModel
 from ..core.config import settings
 
+# @torch.no_grad()
+# def forward(model, tokens, ref_s, speed):
+#     """Forward pass through the model"""
+#     device = ref_s.device
+#     tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
+#     input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
+#     text_mask = length_to_mask(input_lengths).to(device)
+#     bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
+#     d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
+#     s = ref_s[:, 128:]
+#     d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
+#     x, _ = model.predictor.lstm(d)
+#     duration = model.predictor.duration_proj(x)
+#     duration = torch.sigmoid(duration).sum(axis=-1) / speed
+#     pred_dur = torch.round(duration).clamp(min=1).long()
+#     pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
+#     c_frame = 0
+#     for i in range(pred_aln_trg.size(0)):
+#         pred_aln_trg[i, c_frame : c_frame + pred_dur[0, i].item()] = 1
+#         c_frame += pred_dur[0, i].item()
+#     en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
+#     F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
+#     t_en = model.text_encoder(tokens, input_lengths, text_mask)
+#     asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
+#     return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
 @torch.no_grad()
 def forward(model, tokens, ref_s, speed):
-    """Forward pass through the model"""
+    """Forward pass through the model with light optimizations that preserve output quality"""
     device = ref_s.device
+    
+    # Keep original token handling but optimize device placement
     tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
     input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
     text_mask = length_to_mask(input_lengths).to(device)
+    
+    # BERT and encoder pass
     bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
     d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
-    s = ref_s[:, 128:]
-    d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
+    
+    # Split reference signal once for efficiency
+    s_content = ref_s[:, 128:]
+    s_ref = ref_s[:, :128]
+    
+    # Predictor forward pass
+    d = model.predictor.text_encoder(d_en, s_content, input_lengths, text_mask)
     x, _ = model.predictor.lstm(d)
+    
+    # Duration prediction - keeping original logic
     duration = model.predictor.duration_proj(x)
     duration = torch.sigmoid(duration).sum(axis=-1) / speed
     pred_dur = torch.round(duration).clamp(min=1).long()
-    pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
+    
+    # Alignment matrix construction - keeping original approach for quality
+    pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item(), device=device)
     c_frame = 0
     for i in range(pred_aln_trg.size(0)):
-        pred_aln_trg[i, c_frame : c_frame + pred_dur[0, i].item()] = 1
+        pred_aln_trg[i, c_frame:c_frame + pred_dur[0, i].item()] = 1
         c_frame += pred_dur[0, i].item()
-    en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
-    F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
+    
+    # Matrix multiplications - reuse unsqueezed tensor
+    pred_aln_trg = pred_aln_trg.unsqueeze(0)  # Do unsqueeze once
+    en = d.transpose(-1, -2) @ pred_aln_trg
+    F0_pred, N_pred = model.predictor.F0Ntrain(en, s_content)
+    
+    # Text encoding and final decoding
     t_en = model.text_encoder(tokens, input_lengths, text_mask)
-    asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
-    return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
+    asr = t_en @ pred_aln_trg
+    
+    return model.decoder(asr, F0_pred, N_pred, s_ref).squeeze().cpu().numpy()
+
+# def length_to_mask(lengths):
+#     """Create attention mask from lengths"""
+#     mask = (
+#         torch.arange(lengths.max())
+#         .unsqueeze(0)
+#         .expand(lengths.shape[0], -1)
+#         .type_as(lengths)
+#     )
+#     mask = torch.gt(mask + 1, lengths.unsqueeze(1))
+#     return mask
 
 def length_to_mask(lengths):
-    """Create attention mask from lengths"""
-    mask = (
-        torch.arange(lengths.max())
-        .unsqueeze(0)
-        .expand(lengths.shape[0], -1)
-        .type_as(lengths)
-    )
-    mask = torch.gt(mask + 1, lengths.unsqueeze(1))
-    return mask
+    """Create attention mask from lengths - possibly optimized version"""
+    max_len = lengths.max()
+    # Create mask directly on the same device as lengths
+    mask = torch.arange(max_len, device=lengths.device)[None, :].expand(lengths.shape[0], -1)
+    # Avoid type_as by using the correct dtype from the start
+    if lengths.dtype != mask.dtype:
+        mask = mask.to(dtype=lengths.dtype)
+    # Fuse operations  using broadcasting
+    return mask + 1 > lengths[:, None]
 
 class TTSGPUModel(TTSBaseModel):
     _instance = None
diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py
index fbd20e6..8e2aed6 100644
--- a/api/src/services/tts_service.py
+++ b/api/src/services/tts_service.py
@@ -8,7 +8,7 @@ from functools import lru_cache
 import numpy as np
 import torch
 import scipy.io.wavfile as wavfile
-from .text_processing import normalize_text
+from .text_processing import normalize_text, chunker
 from loguru import logger
 
 from ..core.config import settings
@@ -20,40 +20,6 @@ class TTSService:
     def __init__(self, output_dir: str = None):
         self.output_dir = output_dir
 
-    def _split_text(self, text: str):
-        """Generate text chunks one at a time, splitting on natural pause points"""
-        if not isinstance(text, str):
-            text = str(text) if text is not None else ""
-            
-        # First split into sentences
-        sentences = re.split(r"(?<=[.!?])\s+", text)
-        
-        for sentence in sentences:
-            sentence = sentence.strip()
-            if not sentence:
-                continue
-                
-            # For longer sentences, split on commas and semicolons
-            if len(sentence) > 300:  # Only split long sentences
-                # Split on pause points while preserving the punctuation
-                chunks = re.split(r"((?<=[,;])\s+)", sentence)
-                
-                # Reassemble chunks with their trailing punctuation
-                current_chunk = ""
-                for i, chunk in enumerate(chunks):
-                    if i % 2 == 0:  # Text chunk
-                        current_chunk += chunk
-                    else:  # Punctuation/whitespace chunk
-                        current_chunk += chunk
-                        if current_chunk.strip():
-                            yield current_chunk.strip()
-                        current_chunk = ""
-                        
-                # Yield any remaining text
-                if current_chunk.strip():
-                    yield current_chunk.strip()
-            else:
-                yield sentence
 
     @staticmethod
     @lru_cache(maxsize=20)  # Cache up to 8 most recently used voices
@@ -96,28 +62,32 @@ class TTSService:
             # Load voice using cached loader
             voicepack = self._load_voice(voice_path)
 
-            # Generate audio with or without stitching
+            # For non-streaming, preprocess all chunks first
             if stitch_long_output:
-                audio_chunks = []
-                chunk_count = 0
-
-                # Process chunks as they're generated
-                for chunk in self._split_text(text):
+                # Preprocess all chunks to phonemes/tokens
+                chunks_data = []
+                for chunk in chunker.split_text(text):
                     try:
-                        # Process text and generate audio
                         phonemes, tokens = TTSModel.process_text(chunk, voice[0])
+                        chunks_data.append((chunk, tokens))
+                    except Exception as e:
+                        logger.error(f"Failed to process chunk: '{chunk}'. Error: {str(e)}")
+                        continue
+
+                if not chunks_data:
+                    raise ValueError("No chunks were processed successfully")
+
+                # Generate audio for all chunks
+                audio_chunks = []
+                for chunk, tokens in chunks_data:
+                    try:
                         chunk_audio = TTSModel.generate_from_tokens(tokens, voicepack, speed)
-    
                         if chunk_audio is not None:
                             audio_chunks.append(chunk_audio)
-                            chunk_count += 1
                         else:
-                            logger.error(f"No audio generated for chunk {chunk_count + 1}")
-                            
+                            logger.error(f"No audio generated for chunk: '{chunk}'")
                     except Exception as e:
-                        logger.error(
-                            f"Failed to generate audio for chunk {chunk_count + 1}: '{chunk}'. Error: {str(e)}"
-                        )
+                        logger.error(f"Failed to generate audio for chunk: '{chunk}'. Error: {str(e)}")
                         continue
 
                 if not audio_chunks:
@@ -138,53 +108,93 @@ class TTSService:
             raise
 
     async def generate_audio_stream(
-        self, text: str, voice: str, speed: float, output_format: str = "wav"
+        self, text: str, voice: str, speed: float, output_format: str = "wav", silent=False
     ):
         """Generate and yield audio chunks as they're generated for real-time streaming"""
         try:
+            stream_start = time.time()
             # Create normalizer for consistent audio levels
             stream_normalizer = AudioNormalizer()
             
             # Input validation and preprocessing
             if not text:
                 raise ValueError("Text is empty")
+            preprocess_start = time.time()
             normalized = normalize_text(text)
             if not normalized:
                 raise ValueError("Text is empty after preprocessing")
             text = str(normalized)
+            logger.debug(f"Text preprocessing took: {(time.time() - preprocess_start)*1000:.1f}ms")
 
             # Voice validation and loading
+            voice_start = time.time()
             voice_path = self._get_voice_path(voice)
             if not voice_path:
                 raise ValueError(f"Voice not found: {voice}")
             voicepack = self._load_voice(voice_path)
+            logger.debug(f"Voice loading took: {(time.time() - voice_start)*1000:.1f}ms")
 
             # Process chunks as they're generated
             is_first = True
-            for chunk in self._split_text(text):
+            chunks_processed = 0
+            # last_chunk_end = time.time()
+            
+            # Process chunks as they come from generator
+            chunk_gen = chunker.split_text(text)
+            current_chunk = next(chunk_gen, None)
+            
+            while current_chunk is not None:
+                next_chunk = next(chunk_gen, None)  # Peek at next chunk
+                # chunk_start = time.time()
+                chunks_processed += 1
                 try:
                     # Process text and generate audio
-                    phonemes, tokens = TTSModel.process_text(chunk, voice[0])
+                    # text_process_start = time.time()
+                    phonemes, tokens = TTSModel.process_text(current_chunk, voice[0])
+                    # text_process_time = time.time() - text_process_start
+                    
+                    # audio_gen_start = time.time()
                     chunk_audio = TTSModel.generate_from_tokens(tokens, voicepack, speed)
-
+                    # audio_gen_time = time.time() - audio_gen_start
+                    
                     if chunk_audio is not None:
                         # Convert chunk with proper header handling
+                        convert_start = time.time()
                         chunk_bytes = AudioService.convert_audio(
                             chunk_audio,
                             24000,
                             output_format,
                             is_first_chunk=is_first,
-                            normalizer=stream_normalizer
+                            normalizer=stream_normalizer,
+                            is_last_chunk=(next_chunk is None)  # Last if no next chunk
                         )
+                        # convert_time = time.time() - convert_start
+                        
+                        # Calculate gap from last chunk
+                        # gap_time = chunk_start - last_chunk_end
+                        
+                        # Log timing details if not silent
+                        # if not silent:
+                        #     logger.debug(
+                        #         f"\nChunk {chunks_processed} timing:"
+                        #         f"\n  Gap from last chunk: {gap_time*1000:.1f}ms"
+                        #         f"\n  Text processing: {text_process_time*1000:.1f}ms"
+                        #         f"\n  Audio generation: {audio_gen_time*1000:.1f}ms"
+                        #         f"\n  Audio conversion: {convert_time*1000:.1f}ms"
+                        #         f"\n  Total chunk time: {(time.time() - chunk_start)*1000:.1f}ms"
+                        #     )
+                        
                         yield chunk_bytes
                         is_first = False
+                        # last_chunk_end = time.time()
                     else:
-                        logger.error(f"No audio generated for chunk: '{chunk}'")
+                        logger.error(f"No audio generated for chunk: '{current_chunk}'")
 
                 except Exception as e:
-                    logger.error(f"Failed to generate audio for chunk: '{chunk}'. Error: {str(e)}")
-                    continue
-
+                    logger.error(f"Failed to generate audio for chunk: '{current_chunk}'. Error: {str(e)}")
+                
+                current_chunk = next_chunk  # Move to next chunk
+                
         except Exception as e:
             logger.error(f"Error in audio generation stream: {str(e)}")
             raise
diff --git a/api/src/services/warmup.py b/api/src/services/warmup.py
new file mode 100644
index 0000000..67937dd
--- /dev/null
+++ b/api/src/services/warmup.py
@@ -0,0 +1,52 @@
+import os
+from typing import List, Tuple
+import torch
+from loguru import logger
+
+from .tts_service import TTSService
+from .tts_model import TTSModel
+
+
+class WarmupService:
+    """Service for warming up TTS models and voice caches"""
+    
+    def __init__(self):
+        self.tts_service = TTSService()
+        
+    def load_voices(self) -> List[Tuple[str, torch.Tensor]]:
+        """Load and cache voices up to LRU limit"""
+        # Get all voices sorted by filename length (shorter names first, usually base voices)
+        voice_files = sorted(
+            [f for f in os.listdir(TTSModel.VOICES_DIR) if f.endswith(".pt")],
+            key=len
+        )
+        
+        # Load up to LRU cache limit (20)
+        loaded_voices = []
+        for voice_file in voice_files[:20]:
+            try:
+                voice_path = os.path.join(TTSModel.VOICES_DIR, voice_file)
+                voicepack = torch.load(voice_path, map_location=TTSModel.get_device(), weights_only=True)
+                loaded_voices.append((voice_file[:-3], voicepack))  # Store name and tensor
+                # logger.info(f"Loaded voice {voice_file[:-3]} into cache")
+            except Exception as e:
+                logger.error(f"Failed to load voice {voice_file}: {e}")
+        logger.info(f"Pre-loaded {len(loaded_voices)} voices into cache")
+        return loaded_voices
+        
+    async def warmup_voices(self, warmup_text: str, loaded_voices: List[Tuple[str, torch.Tensor]]):
+        """Warm up voice inference and streaming"""
+        n_warmups = 1
+        for voice_name, _ in loaded_voices[:n_warmups]:
+            try:
+                logger.info(f"Running warmup inference on voice {voice_name}")
+                async for _ in self.tts_service.generate_audio_stream(
+                    warmup_text,
+                    voice_name,
+                    1.0,
+                    "pcm"
+                ):
+                    pass  # Process all chunks to properly warm up
+                logger.info(f"Completed warmup for voice {voice_name}")
+            except Exception as e:
+                logger.warning(f"Warmup failed for voice {voice_name}: {e}")
diff --git a/api/tests/test_chunker.py b/api/tests/test_chunker.py
new file mode 100644
index 0000000..ed598c0
--- /dev/null
+++ b/api/tests/test_chunker.py
@@ -0,0 +1,35 @@
+"""Tests for text chunking service"""
+
+import pytest
+from api.src.services.text_processing import chunker
+
+
+def test_split_text():
+    """Test text splitting into sentences"""
+    text = "First sentence. Second sentence! Third sentence?"
+    sentences = list(chunker.split_text(text))
+    assert len(sentences) == 3
+    assert sentences[0] == "First sentence."
+    assert sentences[1] == "Second sentence!"
+    assert sentences[2] == "Third sentence?"
+
+
+def test_split_text_empty():
+    """Test splitting empty text"""
+    assert list(chunker.split_text("")) == []
+
+
+def test_split_text_single_sentence():
+    """Test splitting single sentence"""
+    text = "Just one sentence."
+    assert list(chunker.split_text(text)) == ["Just one sentence."]
+
+
+def test_split_text_with_custom_chunk_size():
+    """Test splitting with custom max chunk size"""
+    text = "First part, second part, third part."
+    chunks = list(chunker.split_text(text, max_chunk=15))
+    assert len(chunks) == 3
+    assert chunks[0] == "First part,"
+    assert chunks[1] == "second part,"
+    assert chunks[2] == "third part."
diff --git a/api/tests/test_endpoints.py b/api/tests/test_endpoints.py
index 6142e12..4827adc 100644
--- a/api/tests/test_endpoints.py
+++ b/api/tests/test_endpoints.py
@@ -1,7 +1,8 @@
-from unittest.mock import Mock
+from unittest.mock import Mock, AsyncMock
 
 import pytest
 import pytest_asyncio
+import asyncio
 from fastapi.testclient import TestClient
 from httpx import AsyncClient
 
@@ -22,6 +23,12 @@ async def async_client():
 def mock_tts_service(monkeypatch):
     mock_service = Mock()
     mock_service._generate_audio.return_value = (bytes([0, 1, 2, 3]), 1.0)
+    
+    # Create proper async generator mock
+    async def mock_stream(*args, **kwargs):
+        for chunk in [b"chunk1", b"chunk2"]:
+            yield chunk
+    mock_service.generate_audio_stream = mock_stream
     mock_service.list_voices.return_value = [
         "af",
         "bm_lewis",
@@ -65,6 +72,7 @@ def test_openai_speech_endpoint(mock_tts_service, mock_audio_service):
         "voice": "bm_lewis",
         "response_format": "wav",
         "speed": 1.0,
+        "stream": False  # Explicitly disable streaming
     }
     response = client.post("/v1/audio/speech", json=test_request)
     assert response.status_code == 200
@@ -84,6 +92,7 @@ def test_openai_speech_invalid_voice(mock_tts_service):
         "voice": "invalid_voice",
         "response_format": "wav",
         "speed": 1.0,
+        "stream": False  # Explicitly disable streaming
     }
     response = client.post("/v1/audio/speech", json=test_request)
     assert response.status_code == 400  # Bad request
@@ -98,6 +107,7 @@ def test_openai_speech_invalid_speed(mock_tts_service):
         "voice": "af",
         "response_format": "wav",
         "speed": -1.0,  # Invalid speed
+        "stream": False  # Explicitly disable streaming
     }
     response = client.post("/v1/audio/speech", json=test_request)
     assert response.status_code == 422  # Validation error
@@ -112,6 +122,7 @@ def test_openai_speech_generation_error(mock_tts_service):
         "voice": "af",
         "response_format": "wav",
         "speed": 1.0,
+        "stream": False  # Explicitly disable streaming
     }
     response = client.post("/v1/audio/speech", json=test_request)
     assert response.status_code == 500
@@ -171,13 +182,14 @@ async def test_openai_speech_pcm_streaming(mock_tts_service, async_client):
         "input": "Hello world",
         "voice": "af",
         "response_format": "pcm",
+        "stream": True
     }
     
-    # Mock streaming response
-    async def mock_stream():
-        yield b"chunk1"
-        yield b"chunk2"
-    mock_tts_service.generate_audio_stream.return_value = mock_stream()
+    # Create streaming mock for this test
+    async def mock_stream(*args, **kwargs):
+        for chunk in [b"chunk1", b"chunk2"]:
+            yield chunk
+    mock_tts_service.generate_audio_stream = mock_stream
     
     # Add streaming header
     headers = {"x-raw-response": "stream"}
@@ -198,13 +210,14 @@ async def test_openai_speech_streaming_mp3(mock_tts_service, async_client):
         "input": "Hello world",
         "voice": "af",
         "response_format": "mp3",
+        "stream": True
     }
     
-    # Mock streaming response
-    async def mock_stream():
-        yield b"mp3header"
-        yield b"mp3data"
-    mock_tts_service.generate_audio_stream.return_value = mock_stream()
+    # Create streaming mock for this test
+    async def mock_stream(*args, **kwargs):
+        for chunk in [b"mp3header", b"mp3data"]:
+            yield chunk
+    mock_tts_service.generate_audio_stream = mock_stream
     
     # Add streaming header
     headers = {"x-raw-response": "stream"}
@@ -227,14 +240,14 @@ async def test_openai_speech_streaming_generator(mock_tts_service, async_client)
         "input": "Hello world",
         "voice": "af",
         "response_format": "pcm",
+        "stream": True
     }
     
-    # Mock streaming response
-    async def mock_stream():
-        yield b"chunk1"
-        yield b"chunk2"
-    
-    mock_tts_service.generate_audio_stream.return_value = mock_stream()
+    # Create streaming mock for this test
+    async def mock_stream(*args, **kwargs):
+        for chunk in [b"chunk1", b"chunk2"]:
+            yield chunk
+    mock_tts_service.generate_audio_stream = mock_stream
     
     # Add streaming header
     headers = {"x-raw-response": "stream"}
diff --git a/api/tests/test_main.py b/api/tests/test_main.py
index 51026c5..cb7aa8b 100644
--- a/api/tests/test_main.py
+++ b/api/tests/test_main.py
@@ -28,29 +28,34 @@ async def test_lifespan_successful_warmup(mock_logger, mock_tts_model):
     """Test successful model warmup in lifespan"""
     # Mock file system for voice counting
     mock_tts_model.VOICES_DIR = "/mock/voices"
-    with patch("os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt"]):
-        mock_tts_model.setup.return_value = 3  # 3 voice files
-        mock_tts_model.get_device.return_value = "cuda"
-
-    # Create an async generator from the lifespan context manager
-    async_gen = lifespan(MagicMock())
-    # Start the context manager
-    await async_gen.__aenter__()
-
-    # Verify the expected logging sequence
-    mock_logger.info.assert_any_call("Loading TTS model and voice packs...")
     
-    # Check for the startup message containing the required info
-    startup_calls = [call[0][0] for call in mock_logger.info.call_args_list]
-    startup_msg = next(msg for msg in startup_calls if "Model loaded and warmed up on" in msg)
-    assert "Model loaded and warmed up on cuda" in startup_msg
-    assert "3 voice packs loaded successfully" in startup_msg
+    # Create async mock
+    async def async_setup():
+        return 3
+    mock_tts_model.setup = MagicMock()
+    mock_tts_model.setup.side_effect = async_setup
+    mock_tts_model.get_device.return_value = "cuda"
+    
+    with patch("os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt"]):
+        # Create an async generator from the lifespan context manager
+        async_gen = lifespan(MagicMock())
+        # Start the context manager
+        await async_gen.__aenter__()
 
-    # Verify model setup was called
-    mock_tts_model.setup.assert_called_once()
+        # Verify the expected logging sequence
+        mock_logger.info.assert_any_call("Loading TTS model and voice packs...")
+        
+        # Check for the startup message containing the required info
+        startup_calls = [call[0][0] for call in mock_logger.info.call_args_list]
+        startup_msg = next(msg for msg in startup_calls if "Model warmed up on" in msg)
+        assert "Model warmed up on" in startup_msg
+        assert "3 voice packs loaded" in startup_msg
 
-    # Clean up
-    await async_gen.__aexit__(None, None, None)
+        # Verify model setup was called
+        mock_tts_model.setup.assert_called_once()
+
+        # Clean up
+        await async_gen.__aexit__(None, None, None)
 
 
 @pytest.mark.asyncio
@@ -81,39 +86,21 @@ async def test_lifespan_cuda_warmup(mock_tts_model):
     """Test model warmup specifically on CUDA"""
     # Mock file system for voice counting
     mock_tts_model.VOICES_DIR = "/mock/voices"
+    
+    # Create async mock
+    async def async_setup():
+        return 2
+    mock_tts_model.setup = MagicMock()
+    mock_tts_model.setup.side_effect = async_setup
+    mock_tts_model.get_device.return_value = "cuda"
+    
     with patch("os.listdir", return_value=["voice1.pt", "voice2.pt"]):
-        mock_tts_model.setup.return_value = 2  # 2 voice files
-        mock_tts_model.get_device.return_value = "cuda"
+        # Create an async generator from the lifespan context manager
+        async_gen = lifespan(MagicMock())
+        await async_gen.__aenter__()
 
-    # Create an async generator from the lifespan context manager
-    async_gen = lifespan(MagicMock())
-    await async_gen.__aenter__()
+        # Verify model setup was called
+        mock_tts_model.setup.assert_called_once()
 
-    # Verify model setup was called
-    mock_tts_model.setup.assert_called_once()
-
-    # Clean up
-    await async_gen.__aexit__(None, None, None)
-
-
-@pytest.mark.asyncio
-@patch("api.src.main.TTSModel")
-async def test_lifespan_cpu_fallback(mock_tts_model):
-    """Test model warmup falling back to CPU"""
-    # Mock file system for voice counting
-    mock_tts_model.VOICES_DIR = "/mock/voices"
-    with patch(
-        "os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt", "voice4.pt"]
-    ):
-        mock_tts_model.setup.return_value = 4  # 4 voice files
-        mock_tts_model.get_device.return_value = "cpu"
-
-    # Create an async generator from the lifespan context manager
-    async_gen = lifespan(MagicMock())
-    await async_gen.__aenter__()
-
-    # Verify model setup was called
-    mock_tts_model.setup.assert_called_once()
-
-    # Clean up
-    await async_gen.__aexit__(None, None, None)
+        # Clean up
+        await async_gen.__aexit__(None, None, None)
diff --git a/api/tests/test_tts_implementations.py b/api/tests/test_tts_implementations.py
index 3f10c17..9e92392 100644
--- a/api/tests/test_tts_implementations.py
+++ b/api/tests/test_tts_implementations.py
@@ -16,13 +16,14 @@ def test_get_device_error():
     with pytest.raises(RuntimeError, match="Model not initialized"):
         TTSBaseModel.get_device()
 
+@pytest.mark.asyncio
 @patch('torch.cuda.is_available')
 @patch('os.path.exists')
 @patch('os.path.join')
 @patch('os.listdir')
 @patch('torch.load')
 @patch('torch.save')
-def test_setup_cuda_available(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
+async def test_setup_cuda_available(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
     """Test setup with CUDA available"""
     TTSBaseModel._device = None
     mock_cuda_available.return_value = True
@@ -36,17 +37,18 @@ def test_setup_cuda_available(mock_save, mock_load, mock_listdir, mock_join, moc
     TTSBaseModel.process_text = MagicMock(return_value=("dummy", [1,2,3]))
     TTSBaseModel.generate_from_tokens = MagicMock(return_value=np.zeros(1000))
     
-    voice_count = TTSBaseModel.setup()
+    voice_count = await TTSBaseModel.setup()
     assert TTSBaseModel._device == "cuda"
     assert voice_count == 2
 
+@pytest.mark.asyncio
 @patch('torch.cuda.is_available')
 @patch('os.path.exists')
 @patch('os.path.join')
 @patch('os.listdir')
 @patch('torch.load')
 @patch('torch.save')
-def test_setup_cuda_unavailable(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
+async def test_setup_cuda_unavailable(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
     """Test setup with CUDA unavailable"""
     TTSBaseModel._device = None
     mock_cuda_available.return_value = False
@@ -60,7 +62,7 @@ def test_setup_cuda_unavailable(mock_save, mock_load, mock_listdir, mock_join, m
     TTSBaseModel.process_text = MagicMock(return_value=("dummy", [1,2,3]))
     TTSBaseModel.generate_from_tokens = MagicMock(return_value=np.zeros(1000))
     
-    voice_count = TTSBaseModel.setup()
+    voice_count = await TTSBaseModel.setup()
     assert TTSBaseModel._device == "cpu"
     assert voice_count == 2
 
diff --git a/api/tests/test_tts_service.py b/api/tests/test_tts_service.py
index 4e63ff1..4fa2c52 100644
--- a/api/tests/test_tts_service.py
+++ b/api/tests/test_tts_service.py
@@ -31,27 +31,6 @@ def sample_audio():
     return np.sin(2 * np.pi * frequency * t).astype(np.float32)
 
 
-def test_split_text(tts_service):
-    """Test text splitting into sentences"""
-    text = "First sentence. Second sentence! Third sentence?"
-    sentences = tts_service._split_text(text)
-    assert len(sentences) == 3
-    assert sentences[0] == "First sentence."
-    assert sentences[1] == "Second sentence!"
-    assert sentences[2] == "Third sentence?"
-
-
-def test_split_text_empty(tts_service):
-    """Test splitting empty text"""
-    assert tts_service._split_text("") == []
-
-
-def test_split_text_single_sentence(tts_service):
-    """Test splitting single sentence"""
-    text = "Just one sentence."
-    assert tts_service._split_text(text) == ["Just one sentence."]
-
-
 def test_audio_to_bytes(tts_service, sample_audio):
     """Test converting audio tensor to bytes"""
     audio_bytes = tts_service._audio_to_bytes(sample_audio)
@@ -152,7 +131,7 @@ def test_generate_audio_phonemize_error(
     mock_torch_load.return_value = torch.zeros((10, 24000))
     mock_generate.return_value = (None, None)
 
-    with pytest.raises(ValueError, match="No audio chunks were generated successfully"):
+    with pytest.raises(ValueError, match="No chunks were processed successfully"):
         tts_service._generate_audio("Test text", "af", 1.0)
 
 
@@ -185,7 +164,7 @@ def test_generate_audio_error(
     mock_exists.return_value = True
     mock_torch_load.return_value = torch.zeros((10, 24000))
 
-    with pytest.raises(ValueError, match="No audio chunks were generated successfully"):
+    with pytest.raises(ValueError, match="No chunks were processed successfully"):
         tts_service._generate_audio("Test text", "af", 1.0)
 
 
diff --git a/assets/format_comparison.png b/assets/format_comparison.png
new file mode 100644
index 0000000..f1669bc
Binary files /dev/null and b/assets/format_comparison.png differ
diff --git a/assets/gpu_first_token_latency_direct.png b/assets/gpu_first_token_latency_direct.png
new file mode 100644
index 0000000..769bbbc
Binary files /dev/null and b/assets/gpu_first_token_latency_direct.png differ
diff --git a/assets/gpu_first_token_latency_openai.png b/assets/gpu_first_token_latency_openai.png
new file mode 100644
index 0000000..06f1b13
Binary files /dev/null and b/assets/gpu_first_token_latency_openai.png differ
diff --git a/assets/gpu_first_token_timeline_direct.png b/assets/gpu_first_token_timeline_direct.png
new file mode 100644
index 0000000..37ae620
Binary files /dev/null and b/assets/gpu_first_token_timeline_direct.png differ
diff --git a/assets/gpu_first_token_timeline_openai.png b/assets/gpu_first_token_timeline_openai.png
new file mode 100644
index 0000000..fa566cc
Binary files /dev/null and b/assets/gpu_first_token_timeline_openai.png differ
diff --git a/assets/gpu_processing_time.png b/assets/gpu_processing_time.png
new file mode 100644
index 0000000..334e37a
Binary files /dev/null and b/assets/gpu_processing_time.png differ
diff --git a/assets/gpu_realtime_factor.png b/assets/gpu_realtime_factor.png
new file mode 100644
index 0000000..1e55996
Binary files /dev/null and b/assets/gpu_realtime_factor.png differ
diff --git a/assets/gpu_total_time_latency_direct.png b/assets/gpu_total_time_latency_direct.png
new file mode 100644
index 0000000..a76c467
Binary files /dev/null and b/assets/gpu_total_time_latency_direct.png differ
diff --git a/assets/gpu_total_time_latency_openai.png b/assets/gpu_total_time_latency_openai.png
new file mode 100644
index 0000000..0acec6a
Binary files /dev/null and b/assets/gpu_total_time_latency_openai.png differ
diff --git a/assets/voice_analysis.png b/assets/voice_analysis.png
new file mode 100644
index 0000000..401c718
Binary files /dev/null and b/assets/voice_analysis.png differ
diff --git a/docker-compose.cpu.yml b/docker-compose.cpu.yml
index f44f2d4..8a4e3b1 100644
--- a/docker-compose.cpu.yml
+++ b/docker-compose.cpu.yml
@@ -43,6 +43,7 @@ services:
       - ONNX_OPTIMIZATION_LEVEL=all
       - ONNX_MEMORY_PATTERN=true
       - ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo
+      
     depends_on:
       model-fetcher:
         condition: service_healthy
diff --git a/docker-compose.yml b/docker-compose.yml
index 16f4b4b..5b6b31b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@ services:
   model-fetcher:
     image: datamachines/git-lfs:latest
     environment:
-      - SKIP_MODEL_FETCH=${SKIP_MODEL_FETCH:-true}
+      - SKIP_MODEL_FETCH=${SKIP_MODEL_FETCH:-false}
     volumes:
       - ./Kokoro-82M:/app/Kokoro-82M
     working_dir: /app/Kokoro-82M
@@ -32,10 +32,10 @@ services:
       start_period: 1s
 
   kokoro-tts:
-    image: ghcr.io/remsky/kokoro-fastapi:latest
+    # image: ghcr.io/remsky/kokoro-fastapi:latest
     # Uncomment below to build from source instead of using the released image
-    # build:
-    #   context: .
+    build:
+      context: .
     volumes:
       - ./api/src:/app/api/src
       - ./Kokoro-82M:/app/Kokoro-82M
@@ -54,14 +54,14 @@ services:
       model-fetcher:
         condition: service_healthy
 
-  # # Gradio UI service [Comment out everything below if you don't need it]
-  # gradio-ui:
-  #   build:
-  #     context: ./ui
-  #   ports:
-  #     - "7860:7860"
-  #   volumes:
-  #     - ./ui/data:/app/ui/data
-  #     - ./ui/app.py:/app/app.py  # Mount app.py for hot reload
-  #   environment:
-  #     - GRADIO_WATCH=True  # Enable hot reloading
+  # Gradio UI service [Comment out everything below if you don't need it]
+  gradio-ui:
+    build:
+      context: ./ui
+    ports:
+      - "7860:7860"
+    volumes:
+      - ./ui/data:/app/ui/data
+      - ./ui/app.py:/app/app.py  # Mount app.py for hot reload
+    environment:
+      - GRADIO_WATCH=True  # Enable hot reloading
diff --git a/examples/assorted_checks/benchmarks/benchmark_first_token.py b/examples/assorted_checks/benchmarks/benchmark_first_token.py
index 6709876..a9e47bb 100644
--- a/examples/assorted_checks/benchmarks/benchmark_first_token.py
+++ b/examples/assorted_checks/benchmarks/benchmark_first_token.py
@@ -1,15 +1,19 @@
 #!/usr/bin/env python3
 import os
-import time
 import json
-import numpy as np
-import requests
-import pandas as pd
-from lib.shared_benchmark_utils import get_text_for_tokens, enc
-from lib.shared_utils import save_json_results
-from lib.shared_plotting import plot_correlation, plot_timeline
+import time
 
-def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int) -> dict:
+import numpy as np
+import pandas as pd
+import requests
+from lib.shared_utils import save_json_results
+from lib.shared_plotting import plot_timeline, plot_correlation
+from lib.shared_benchmark_utils import enc, get_text_for_tokens
+
+
+def measure_first_token(
+    text: str, output_dir: str, tokens: int, run_number: int
+) -> dict:
     """Measure time to audio via API calls and save the audio output"""
     results = {
         "text_length": len(text),
@@ -18,12 +22,12 @@ def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int
         "time_to_first_chunk": None,
         "error": None,
         "audio_path": None,
-        "audio_length": None  # Length of output audio in seconds
+        "audio_length": None,  # Length of output audio in seconds
     }
-    
+
     try:
         start_time = time.time()
-        
+
         # Make request without streaming
         response = requests.post(
             "http://localhost:8880/v1/audio/speech",
@@ -32,58 +36,62 @@ def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int
                 "input": text,
                 "voice": "af",
                 "response_format": "wav",
-                "stream": False
+                "stream": False,
             },
-            timeout=1800
+            timeout=1800,
         )
         response.raise_for_status()
-        
+
         # Save complete audio
         audio_filename = f"benchmark_tokens{tokens}_run{run_number}.wav"
         audio_path = os.path.join(output_dir, audio_filename)
         results["audio_path"] = audio_path
-        
+
         content = response.content
-        with open(audio_path, 'wb') as f:
+        with open(audio_path, "wb") as f:
             f.write(content)
-        
+
         # Calculate audio length using scipy
         import scipy.io.wavfile as wavfile
+
         sample_rate, audio_data = wavfile.read(audio_path)
         results["audio_length"] = len(audio_data) / sample_rate  # Length in seconds
         results["time_to_first_chunk"] = time.time() - start_time
-        
+
         results["total_time"] = time.time() - start_time
         return results
-        
+
     except Exception as e:
         results["error"] = str(e)
         return results
 
+
 def main():
     # Set up paths
     script_dir = os.path.dirname(os.path.abspath(__file__))
     output_dir = os.path.join(script_dir, "output_audio")
     output_data_dir = os.path.join(script_dir, "output_data")
-    
+
     # Create output directories
     os.makedirs(output_dir, exist_ok=True)
     os.makedirs(output_data_dir, exist_ok=True)
 
     # Load sample text
-    with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
+    with open(
+        os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8"
+    ) as f:
         text = f.read()
 
     # Test specific token counts
     token_sizes = [10, 25, 50, 100, 200, 500]
     all_results = []
-    
+
     for tokens in token_sizes:
         print(f"\nTesting {tokens} tokens")
         test_text = get_text_for_tokens(text, tokens)
         actual_tokens = len(enc.encode(test_text))
         print(f"Text preview: {test_text[:50]}...")
-        
+
         # Run test 3 times for each size to get average
         for i in range(5):
             print(f"Run {i+1}/3...")
@@ -91,67 +99,74 @@ def main():
             result["target_tokens"] = tokens
             result["actual_tokens"] = actual_tokens
             result["run_number"] = i + 1
-            
+
             print(f"Time to Audio: {result.get('time_to_first_chunk', 'N/A'):.3f}s")
             print(f"Total time: {result.get('total_time', 'N/A'):.3f}s")
-            
+
             if result["error"]:
                 print(f"Error: {result['error']}")
-            
+
             all_results.append(result)
-    
+
     # Calculate averages per token size
     summary = {}
     for tokens in token_sizes:
-        matching_results = [r for r in all_results if r["target_tokens"] == tokens and not r["error"]]
+        matching_results = [
+            r for r in all_results if r["target_tokens"] == tokens and not r["error"]
+        ]
         if matching_results:
-            avg_first_chunk = sum(r["time_to_first_chunk"] for r in matching_results) / len(matching_results)
-            avg_total = sum(r["total_time"] for r in matching_results) / len(matching_results)
-            avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(matching_results)
+            avg_first_chunk = sum(
+                r["time_to_first_chunk"] for r in matching_results
+            ) / len(matching_results)
+            avg_total = sum(r["total_time"] for r in matching_results) / len(
+                matching_results
+            )
+            avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(
+                matching_results
+            )
             summary[tokens] = {
                 "avg_time_to_first_chunk": round(avg_first_chunk, 3),
                 "avg_total_time": round(avg_total, 3),
                 "avg_audio_length": round(avg_audio_length, 3),
-                "num_successful_runs": len(matching_results)
+                "num_successful_runs": len(matching_results),
             }
-    
+
     # Save results
     # Save results
     results_data = {
         "individual_runs": all_results,
         "summary": summary,
-        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
     }
     save_json_results(
-        results_data,
-        os.path.join(output_data_dir, "first_token_benchmark.json")
+        results_data, os.path.join(output_data_dir, "first_token_benchmark.json")
     )
-    
+
     # Create plot directory if it doesn't exist
     output_plots_dir = os.path.join(script_dir, "output_plots")
     os.makedirs(output_plots_dir, exist_ok=True)
-    
+
     # Create DataFrame for plotting
     df = pd.DataFrame(all_results)
-    
+
     # Create both plots
     plot_correlation(
-        df, "target_tokens", "time_to_first_chunk",
+        df,
+        "target_tokens",
+        "time_to_first_chunk",
         "Time to Audio vs Input Size",
         "Number of Input Tokens",
         "Time to Audio (seconds)",
-        os.path.join(output_plots_dir, "first_token_latency.png")
+        os.path.join(output_plots_dir, "first_token_latency.png"),
     )
-    
-    plot_timeline(
-        df,
-        os.path.join(output_plots_dir, "first_token_timeline.png")
-    )
-    
+
+    plot_timeline(df, os.path.join(output_plots_dir, "first_token_timeline.png"))
+
     print("\nResults and plots saved to:")
     print(f"- {os.path.join(output_data_dir, 'first_token_benchmark.json')}")
     print(f"- {os.path.join(output_plots_dir, 'first_token_latency.png')}")
     print(f"- {os.path.join(output_plots_dir, 'first_token_timeline.png')}")
 
+
 if __name__ == "__main__":
     main()
diff --git a/examples/assorted_checks/benchmarks/benchmark_first_token_stream.py b/examples/assorted_checks/benchmarks/benchmark_first_token_stream.py
deleted file mode 100644
index 9e4deba..0000000
--- a/examples/assorted_checks/benchmarks/benchmark_first_token_stream.py
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/usr/bin/env python3
-import os
-import time
-import json
-import numpy as np
-import requests
-import pandas as pd
-from lib.shared_benchmark_utils import get_text_for_tokens, enc
-from lib.shared_utils import save_json_results
-from lib.shared_plotting import plot_correlation, plot_timeline
-
-def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int) -> dict:
-    """Measure time to audio via API calls and save the audio output"""
-    results = {
-        "text_length": len(text),
-        "token_count": len(enc.encode(text)),
-        "total_time": None,
-        "time_to_first_chunk": None,
-        "error": None,
-        "audio_path": None,
-        "audio_length": None  # Length of output audio in seconds
-    }
-    
-    try:
-        start_time = time.time()
-        
-        # Make request with streaming enabled
-        response = requests.post(
-            "http://localhost:8880/v1/audio/speech",
-            json={
-                "model": "kokoro",
-                "input": text,
-                "voice": "af",
-                "response_format": "pcm",
-                "stream": True
-            },
-            stream=True,
-            timeout=1800
-        )
-        response.raise_for_status()
-        
-        # Save complete audio
-        audio_filename = f"benchmark_tokens{tokens}_run{run_number}_stream.wav"
-        audio_path = os.path.join(output_dir, audio_filename)
-        results["audio_path"] = audio_path
-        
-        first_chunk_time = None
-        chunks = []
-        for chunk in response.iter_content(chunk_size=1024):
-            if chunk:
-                if first_chunk_time is None:
-                    first_chunk_time = time.time()
-                    results["time_to_first_chunk"] = first_chunk_time - start_time
-                chunks.append(chunk)
-        
-        # Concatenate all PCM chunks
-        if not chunks:
-            raise ValueError("No audio chunks received")
-            
-        all_audio_data = b''.join(chunks)
-        
-        # Write as WAV file
-        import wave
-        with wave.open(audio_path, 'wb') as wav_file:
-            wav_file.setnchannels(1)  # Mono
-            wav_file.setsampwidth(2)  # 2 bytes per sample (16-bit)
-            wav_file.setframerate(24000)  # Known sample rate for Kokoro
-            wav_file.writeframes(all_audio_data)
-        
-        # Calculate audio length using scipy
-        import scipy.io.wavfile as wavfile
-        sample_rate, audio_data = wavfile.read(audio_path)
-        results["audio_length"] = len(audio_data) / sample_rate  # Length in seconds
-        
-        results["total_time"] = time.time() - start_time
-        
-        # Print debug info
-        print(f"Complete audio size: {len(all_audio_data)} bytes")
-        print(f"Number of chunks received: {len(chunks)}")
-        print(f"Audio length: {results['audio_length']:.3f}s")
-        
-        return results
-        
-    except Exception as e:
-        results["error"] = str(e)
-        return results
-
-def main():
-    # Set up paths with _stream suffix
-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    output_dir = os.path.join(script_dir, "output_audio_stream")
-    output_data_dir = os.path.join(script_dir, "output_data")
-    
-    # Create output directories
-    os.makedirs(output_dir, exist_ok=True)
-    os.makedirs(output_data_dir, exist_ok=True)
-
-    # Load sample text
-    with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
-        text = f.read()
-
-    # Test specific token counts
-    token_sizes = [50, 100, 200, 500, 1000, 2000, 5000, 10000]
-    all_results = []
-    
-    for tokens in token_sizes:
-        print(f"\nTesting {tokens} tokens (streaming)")
-        test_text = get_text_for_tokens(text, tokens)
-        actual_tokens = len(enc.encode(test_text))
-        print(f"Text preview: {test_text[:50]}...")
-        
-        # Run test 3 times for each size to get average
-        for i in range(5):
-            print(f"Run {i+1}/3...")
-            result = measure_first_token(test_text, output_dir, tokens, i + 1)
-            result["target_tokens"] = tokens
-            result["actual_tokens"] = actual_tokens
-            result["run_number"] = i + 1
-            
-            print(f"Time to First Audio: {result.get('time_to_first_chunk', 'N/A'):.3f}s")
-            print(f"Time to Save Complete: {result.get('total_time', 'N/A'):.3f}s")
-            print(f"Audio length: {result.get('audio_length', 'N/A'):.3f}s")
-            print(f"Streaming overhead: {(result.get('total_time', 0) - result.get('time_to_first_chunk', 0)):.3f}s")
-            
-            if result["error"]:
-                print(f"Error: {result['error']}")
-            
-            all_results.append(result)
-    
-    # Calculate averages per token size
-    summary = {}
-    for tokens in token_sizes:
-        matching_results = [r for r in all_results if r["target_tokens"] == tokens and not r["error"]]
-        if matching_results:
-            avg_first_chunk = sum(r["time_to_first_chunk"] for r in matching_results) / len(matching_results)
-            avg_total = sum(r["total_time"] for r in matching_results) / len(matching_results)
-            avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(matching_results)
-            summary[tokens] = {
-                "avg_time_to_first_chunk": round(avg_first_chunk, 3),
-                "avg_total_time": round(avg_total, 3),
-                "avg_audio_length": round(avg_audio_length, 3),
-                "num_successful_runs": len(matching_results)
-            }
-    
-    # Save results with _stream suffix
-    results_data = {
-        "individual_runs": all_results,
-        "summary": summary,
-        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
-    }
-    save_json_results(
-        results_data,
-        os.path.join(output_data_dir, "first_token_benchmark_stream.json")
-    )
-    
-    # Create plot directory if it doesn't exist
-    output_plots_dir = os.path.join(script_dir, "output_plots")
-    os.makedirs(output_plots_dir, exist_ok=True)
-    
-    # Create DataFrame for plotting
-    df = pd.DataFrame(all_results)
-    
-    # Create both plots with _stream suffix
-    # Plot correlation for both metrics
-    plot_correlation(
-        df, "target_tokens", "time_to_first_chunk",
-        "Time to First Audio vs Input Size (Streaming)",
-        "Number of Input Tokens",
-        "Time to First Audio (seconds)",
-        os.path.join(output_plots_dir, "first_token_latency_stream.png")
-    )
-    
-    plot_correlation(
-        df, "target_tokens", "total_time",
-        "Total Time vs Input Size (Streaming)",
-        "Number of Input Tokens",
-        "Total Time (seconds)",
-        os.path.join(output_plots_dir, "total_time_latency_stream.png")
-    )
-    
-    plot_timeline(
-        df,
-        os.path.join(output_plots_dir, "first_token_timeline_stream.png", suffix="(Streaming)")
-    )
-    
-    print("\nResults and plots saved to:")
-    print(f"- {os.path.join(output_data_dir, 'first_token_benchmark_stream.json')}")
-    print(f"- {os.path.join(output_plots_dir, 'first_token_latency_stream.png')}")
-    print(f"- {os.path.join(output_plots_dir, 'total_time_latency_stream.png')}")
-    print(f"- {os.path.join(output_plots_dir, 'first_token_timeline_stream.png')}")
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/assorted_checks/benchmarks/benchmark_first_token_stream_openai.py b/examples/assorted_checks/benchmarks/benchmark_first_token_stream_openai.py
deleted file mode 100644
index 3adc866..0000000
--- a/examples/assorted_checks/benchmarks/benchmark_first_token_stream_openai.py
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/env python3
-import os
-import time
-import json
-import numpy as np
-import pandas as pd
-from openai import OpenAI
-from lib.shared_benchmark_utils import get_text_for_tokens, enc
-from lib.shared_utils import save_json_results
-from lib.shared_plotting import plot_correlation, plot_timeline
-
-def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int) -> dict:
-    """Measure time to audio via OpenAI API calls and save the audio output"""
-    results = {
-        "text_length": len(text),
-        "token_count": len(enc.encode(text)),
-        "total_time": None,
-        "time_to_first_chunk": None,
-        "error": None,
-        "audio_path": None,
-        "audio_length": None  # Length of output audio in seconds
-    }
-    
-    try:
-        start_time = time.time()
-        
-        # Initialize OpenAI client
-        openai = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed-for-local")
-        
-        # Save complete audio
-        audio_filename = f"benchmark_tokens{tokens}_run{run_number}_stream_openai.wav"
-        audio_path = os.path.join(output_dir, audio_filename)
-        results["audio_path"] = audio_path
-        
-        first_chunk_time = None
-        all_audio_data = bytearray()
-        chunk_count = 0
-        
-        # Make streaming request using OpenAI client
-        with openai.audio.speech.with_streaming_response.create(
-            model="kokoro",
-            voice="af",
-            response_format="pcm",
-            input=text,
-        ) as response:
-            for chunk in response.iter_bytes(chunk_size=1024):
-                if chunk:
-                    chunk_count += 1
-                    if first_chunk_time is None:
-                        first_chunk_time = time.time()
-                        results["time_to_first_chunk"] = first_chunk_time - start_time
-                    all_audio_data.extend(chunk)
-        
-        # Write as WAV file
-        import wave
-        with wave.open(audio_path, 'wb') as wav_file:
-            wav_file.setnchannels(1)  # Mono
-            wav_file.setsampwidth(2)  # 2 bytes per sample (16-bit)
-            wav_file.setframerate(24000)  # Known sample rate for Kokoro
-            wav_file.writeframes(all_audio_data)
-        
-        # Calculate audio length using scipy
-        import scipy.io.wavfile as wavfile
-        sample_rate, audio_data = wavfile.read(audio_path)
-        results["audio_length"] = len(audio_data) / sample_rate  # Length in seconds
-        
-        results["total_time"] = time.time() - start_time
-        
-        # Print debug info
-        print(f"Complete audio size: {len(all_audio_data)} bytes")
-        print(f"Number of chunks received: {chunk_count}")
-        print(f"Audio length: {results['audio_length']:.3f}s")
-        
-        return results
-        
-    except Exception as e:
-        results["error"] = str(e)
-        return results
-
-def main():
-    # Set up paths with _stream_openai suffix
-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    output_dir = os.path.join(script_dir, "output_audio_stream_openai")
-    output_data_dir = os.path.join(script_dir, "output_data")
-    
-    # Create output directories
-    os.makedirs(output_dir, exist_ok=True)
-    os.makedirs(output_data_dir, exist_ok=True)
-
-    # Load sample text
-    with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
-        text = f.read()
-
-    # Test specific token counts
-    token_sizes = [50, 100, 200, 500]
-    all_results = []
-    
-    for tokens in token_sizes:
-        print(f"\nTesting {tokens} tokens (streaming)")
-        test_text = get_text_for_tokens(text, tokens)
-        actual_tokens = len(enc.encode(test_text))
-        print(f"Text preview: {test_text[:50]}...")
-        
-        # Run test 5 times for each size to get average
-        for i in range(5):
-            print(f"Run {i+1}/5...")
-            result = measure_first_token(test_text, output_dir, tokens, i + 1)
-            result["target_tokens"] = tokens
-            result["actual_tokens"] = actual_tokens
-            result["run_number"] = i + 1
-            
-            print(f"Time to First Audio: {result.get('time_to_first_chunk', 'N/A'):.3f}s")
-            print(f"Time to Save Complete: {result.get('total_time', 'N/A'):.3f}s")
-            print(f"Audio length: {result.get('audio_length', 'N/A'):.3f}s")
-            print(f"Streaming overhead: {(result.get('total_time', 0) - result.get('time_to_first_chunk', 0)):.3f}s")
-            
-            if result["error"]:
-                print(f"Error: {result['error']}")
-            
-            all_results.append(result)
-    
-    # Calculate averages per token size
-    summary = {}
-    for tokens in token_sizes:
-        matching_results = [r for r in all_results if r["target_tokens"] == tokens and not r["error"]]
-        if matching_results:
-            avg_first_chunk = sum(r["time_to_first_chunk"] for r in matching_results) / len(matching_results)
-            avg_total = sum(r["total_time"] for r in matching_results) / len(matching_results)
-            avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(matching_results)
-            summary[tokens] = {
-                "avg_time_to_first_chunk": round(avg_first_chunk, 3),
-                "avg_total_time": round(avg_total, 3),
-                "avg_audio_length": round(avg_audio_length, 3),
-                "num_successful_runs": len(matching_results)
-            }
-    
-    # Save results with _stream_openai suffix
-    results_data = {
-        "individual_runs": all_results,
-        "summary": summary,
-        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
-    }
-    save_json_results(
-        results_data,
-        os.path.join(output_data_dir, "first_token_benchmark_stream_openai.json")
-    )
-    
-    # Create plot directory if it doesn't exist
-    output_plots_dir = os.path.join(script_dir, "output_plots")
-    os.makedirs(output_plots_dir, exist_ok=True)
-    
-    # Create DataFrame for plotting
-    df = pd.DataFrame(all_results)
-    
-    # Create plots with _stream_openai suffix
-    plot_correlation(
-        df, "target_tokens", "time_to_first_chunk",
-        "Time to First Audio vs Input Size (OpenAI Streaming)",
-        "Number of Input Tokens",
-        "Time to First Audio (seconds)",
-        os.path.join(output_plots_dir, "first_token_latency_stream_openai.png")
-    )
-    
-    plot_correlation(
-        df, "target_tokens", "total_time",
-        "Total Time vs Input Size (OpenAI Streaming)",
-        "Number of Input Tokens",
-        "Total Time (seconds)",
-        os.path.join(output_plots_dir, "total_time_latency_stream_openai.png")
-    )
-    
-    plot_timeline(
-        df,
-        os.path.join(output_plots_dir, "first_token_timeline_stream_openai.png")
-    )
-    
-    print("\nResults and plots saved to:")
-    print(f"- {os.path.join(output_data_dir, 'first_token_benchmark_stream_openai.json')}")
-    print(f"- {os.path.join(output_plots_dir, 'first_token_latency_stream_openai.png')}")
-    print(f"- {os.path.join(output_plots_dir, 'total_time_latency_stream_openai.png')}")
-    print(f"- {os.path.join(output_plots_dir, 'first_token_timeline_stream_openai.png')}")
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/assorted_checks/benchmarks/benchmark_first_token_stream_unified.py b/examples/assorted_checks/benchmarks/benchmark_first_token_stream_unified.py
new file mode 100644
index 0000000..0b673ae
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/benchmark_first_token_stream_unified.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+import os
+import time
+
+import requests
+from openai import OpenAI
+from lib.stream_utils import run_benchmark
+
+OPENAI_CLIENT = OpenAI(
+    base_url="http://localhost:8880/v1", api_key="not-needed-for-local"
+)
+
+
+def measure_first_token_requests(
+    text: str, output_dir: str, tokens: int, run_number: int
+) -> dict:
+    """Measure time to audio via direct API calls and save the audio output"""
+    results = {
+        "text_length": len(text),
+        "token_count": None,  # Will be set by run_benchmark
+        "total_time": None,
+        "time_to_first_chunk": None,
+        "error": None,
+        "audio_path": None,
+        "audio_length": None,
+    }
+
+    try:
+        start_time = time.time()
+
+        # Make request with streaming enabled
+        response = requests.post(
+            "http://localhost:8880/v1/audio/speech",
+            json={
+                "model": "kokoro",
+                "input": text,
+                "voice": "af",
+                "response_format": "pcm",
+                "stream": True,
+            },
+            stream=True,
+            timeout=1800,
+        )
+        response.raise_for_status()
+
+        # Save complete audio
+        audio_filename = f"benchmark_tokens{tokens}_run{run_number}_stream.wav"
+        audio_path = os.path.join(output_dir, audio_filename)
+        results["audio_path"] = audio_path
+
+        first_chunk_time = None
+        chunks = []
+        for chunk in response.iter_content(chunk_size=1024):
+            if chunk:
+                if first_chunk_time is None:
+                    first_chunk_time = time.time()
+                    results["time_to_first_chunk"] = first_chunk_time - start_time
+                chunks.append(chunk)
+
+        # Concatenate all PCM chunks
+        if not chunks:
+            raise ValueError("No audio chunks received")
+
+        all_audio_data = b"".join(chunks)
+
+        # Write as WAV file
+        import wave
+
+        with wave.open(audio_path, "wb") as wav_file:
+            wav_file.setnchannels(1)  # Mono
+            wav_file.setsampwidth(2)  # 2 bytes per sample (16-bit)
+            wav_file.setframerate(24000)  # Known sample rate for Kokoro
+            wav_file.writeframes(all_audio_data)
+
+        # Calculate audio length using scipy
+        import scipy.io.wavfile as wavfile
+
+        sample_rate, audio_data = wavfile.read(audio_path)
+        results["audio_length"] = len(audio_data) / sample_rate  # Length in seconds
+
+        results["total_time"] = time.time() - start_time
+
+        # Print debug info
+        print(f"Complete audio size: {len(all_audio_data)} bytes")
+        print(f"Number of chunks received: {len(chunks)}")
+        print(f"Audio length: {results['audio_length']:.3f}s")
+
+        return results
+
+    except Exception as e:
+        results["error"] = str(e)
+        return results
+
+
+def measure_first_token_openai(
+    text: str, output_dir: str, tokens: int, run_number: int
+) -> dict:
+    """Measure time to audio via OpenAI API calls and save the audio output"""
+    results = {
+        "text_length": len(text),
+        "token_count": None,  # Will be set by run_benchmark
+        "total_time": None,
+        "time_to_first_chunk": None,
+        "error": None,
+        "audio_path": None,
+        "audio_length": None,
+    }
+
+    try:
+        start_time = time.time()
+
+        # Initialize OpenAI client
+
+        # Save complete audio
+        audio_filename = f"benchmark_tokens{tokens}_run{run_number}_stream_openai.wav"
+        audio_path = os.path.join(output_dir, audio_filename)
+        results["audio_path"] = audio_path
+
+        first_chunk_time = None
+        all_audio_data = bytearray()
+        chunk_count = 0
+
+        # Make streaming request using OpenAI client
+        with OPENAI_CLIENT.audio.speech.with_streaming_response.create(
+            model="kokoro",
+            voice="af",
+            response_format="pcm",
+            input=text,
+        ) as response:
+            for chunk in response.iter_bytes(chunk_size=1024):
+                if chunk:
+                    chunk_count += 1
+                    if first_chunk_time is None:
+                        first_chunk_time = time.time()
+                        results["time_to_first_chunk"] = first_chunk_time - start_time
+                    all_audio_data.extend(chunk)
+
+        # Write as WAV file
+        import wave
+
+        with wave.open(audio_path, "wb") as wav_file:
+            wav_file.setnchannels(1)  # Mono
+            wav_file.setsampwidth(2)  # 2 bytes per sample (16-bit)
+            wav_file.setframerate(24000)  # Known sample rate for Kokoro
+            wav_file.writeframes(all_audio_data)
+
+        # Calculate audio length using scipy
+        import scipy.io.wavfile as wavfile
+
+        sample_rate, audio_data = wavfile.read(audio_path)
+        results["audio_length"] = len(audio_data) / sample_rate  # Length in seconds
+
+        results["total_time"] = time.time() - start_time
+
+        # Print debug info
+        print(f"Complete audio size: {len(all_audio_data)} bytes")
+        print(f"Number of chunks received: {chunk_count}")
+        print(f"Audio length: {results['audio_length']:.3f}s")
+
+        return results
+
+    except Exception as e:
+        results["error"] = str(e)
+        return results
+
+
+def main():
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    prefix='cpu'
+    # Run requests benchmark
+    print("\n=== Running Direct Requests Benchmark ===")
+    run_benchmark(
+        measure_first_token_requests,
+        output_dir=os.path.join(script_dir, "output_audio_stream"),
+        output_data_dir=os.path.join(script_dir, "output_data"),
+        output_plots_dir=os.path.join(script_dir, "output_plots"),
+        suffix="_stream",
+        plot_title_suffix="(Streaming)",
+        prefix=prefix
+    )
+    # Run OpenAI benchmark
+    print("\n=== Running OpenAI Library Benchmark ===")
+    run_benchmark(
+        measure_first_token_openai,
+        output_dir=os.path.join(script_dir, "output_audio_stream_openai"),
+        output_data_dir=os.path.join(script_dir, "output_data"),
+        output_plots_dir=os.path.join(script_dir, "output_plots"),
+        suffix="_stream_openai",
+        plot_title_suffix="(OpenAI Streaming)",
+        prefix=prefix
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py b/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py
index 385abb0..7e4940d 100644
--- a/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py
+++ b/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py
@@ -1,30 +1,37 @@
 #!/usr/bin/env python3
 import os
+import sys
 import json
 import time
-import threading
 import queue
-import pandas as pd
-import sys
+import threading
 from datetime import datetime
 
-from lib.shared_plotting import plot_system_metrics, plot_correlation
+import pandas as pd
 from lib.shared_utils import (
-    get_system_metrics, save_json_results, write_benchmark_stats,
-    real_time_factor
+    real_time_factor,
+    save_json_results,
+    get_system_metrics,
+    write_benchmark_stats,
 )
+from lib.shared_plotting import plot_correlation, plot_system_metrics
 from lib.shared_benchmark_utils import (
-    get_text_for_tokens, make_tts_request, generate_token_sizes, enc
+    enc,
+    make_tts_request,
+    get_text_for_tokens,
+    generate_token_sizes,
 )
 
+
 class SystemMonitor:
     def __init__(self, interval=1.0):
+        """Rough system tracker: Not always accurate"""
         self.interval = interval
         self.metrics_queue = queue.Queue()
         self.stop_event = threading.Event()
         self.metrics_timeline = []
         self.start_time = None
-        
+
     def _monitor_loop(self):
         """Background thread function to collect system metrics."""
         while not self.stop_event.is_set():
@@ -32,20 +39,20 @@ class SystemMonitor:
             metrics["relative_time"] = time.time() - self.start_time
             self.metrics_queue.put(metrics)
             time.sleep(self.interval)
-    
+
     def start(self):
         """Start the monitoring thread."""
         self.start_time = time.time()
         self.monitor_thread = threading.Thread(target=self._monitor_loop)
         self.monitor_thread.daemon = True
         self.monitor_thread.start()
-    
+
     def stop(self):
         """Stop the monitoring thread and collect final metrics."""
         self.stop_event.set()
-        if hasattr(self, 'monitor_thread'):
+        if hasattr(self, "monitor_thread"):
             self.monitor_thread.join(timeout=2)
-        
+
         # Collect all metrics from queue
         while True:
             try:
@@ -53,23 +60,24 @@ class SystemMonitor:
                 self.metrics_timeline.append(metrics)
             except queue.Empty:
                 break
-        
+
         return self.metrics_timeline
 
+
 def main():
     # Initialize system monitor
     monitor = SystemMonitor(interval=1.0)  # 1 second interval
     # Set prefix for output files (e.g. "gpu", "cpu", "onnx", etc.)
-    prefix = "gpu"
+    prefix = "cpu"
     # Generate token sizes
-    if 'gpu' in prefix:
+    if "gpu" in prefix:
         token_sizes = generate_token_sizes(
-            max_tokens=5000, dense_step=150, 
-            dense_max=1000, sparse_step=1000)
-    elif 'cpu' in prefix:
+            max_tokens=1000, dense_step=150, dense_max=1000, sparse_step=1000
+        )
+    elif "cpu" in prefix:
         token_sizes = generate_token_sizes(
-            max_tokens=1000, dense_step=300, 
-            dense_max=1000, sparse_step=0)
+            max_tokens=1000, dense_step=100, dense_max=500, sparse_step=250
+        )
     else:
         token_sizes = generate_token_sizes(max_tokens=3000)
 
@@ -78,7 +86,7 @@ def main():
     output_dir = os.path.join(script_dir, "output_audio")
     output_data_dir = os.path.join(script_dir, "output_data")
     output_plots_dir = os.path.join(script_dir, "output_plots")
-    
+
     # Create output directories
     os.makedirs(output_dir, exist_ok=True)
     os.makedirs(output_data_dir, exist_ok=True)
@@ -90,7 +98,9 @@ def main():
             filename = f"{prefix}_{filename}"
         return os.path.join(path, filename)
 
-    with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
+    with open(
+        os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8"
+    ) as f:
         text = f.read()
 
     total_tokens = len(enc.encode(text))
@@ -100,7 +110,7 @@ def main():
 
     results = []
     test_start_time = time.time()
-    
+
     # Start system monitoring
     monitor.start()
 
@@ -114,7 +124,8 @@ def main():
         processing_time, audio_length = make_tts_request(
             chunk,
             output_dir=output_dir,
-            prefix=prefix
+            prefix=prefix,
+            stream=False,  # Use non-streaming mode for RTF benchmarking
         )
         if processing_time is None or audio_length is None:
             print("Breaking loop due to error")
@@ -123,14 +134,16 @@ def main():
         # Calculate RTF using the correct formula
         rtf = real_time_factor(processing_time, audio_length)
         print(f"Real-Time Factor: {rtf:.5f}")
-        
-        results.append({
-            "tokens": actual_tokens,
-            "processing_time": processing_time,
-            "output_length": audio_length,
-            "rtf": rtf,
-            "elapsed_time": round(time.time() - test_start_time, 2),
-        })
+
+        results.append(
+            {
+                "tokens": actual_tokens,
+                "processing_time": processing_time,
+                "output_length": audio_length,
+                "rtf": rtf,
+                "elapsed_time": round(time.time() - test_start_time, 5),
+            }
+        )
 
     df = pd.DataFrame(results)
     if df.empty:
@@ -144,89 +157,101 @@ def main():
         {
             "title": "Benchmark Statistics (with correct RTF)",
             "stats": {
-                "Total tokens processed": df['tokens'].sum(),
-                "Total audio generated (s)": df['output_length'].sum(),
-                "Total test duration (s)": df['elapsed_time'].max(),
-                "Average processing rate (tokens/s)": df['tokens_per_second'].mean(),
-                "Average RTF": df['rtf'].mean(),
-                "Average Real Time Speed": 1/df['rtf'].mean()
-            }
+                "Total tokens processed": df["tokens"].sum(),
+                "Total audio generated (s)": df["output_length"].sum(),
+                "Total test duration (s)": df["elapsed_time"].max(),
+                "Average processing rate (tokens/s)": df["tokens_per_second"].mean(),
+                "Average RTF": df["rtf"].mean(),
+                "Average Real Time Speed": 1 / df["rtf"].mean(),
+            },
         },
         {
             "title": "Per-chunk Stats",
             "stats": {
-                "Average chunk size (tokens)": df['tokens'].mean(),
-                "Min chunk size (tokens)": df['tokens'].min(),
-                "Max chunk size (tokens)": df['tokens'].max(),
-                "Average processing time (s)": df['processing_time'].mean(),
-                "Average output length (s)": df['output_length'].mean()
-            }
+                "Average chunk size (tokens)": df["tokens"].mean(),
+                "Min chunk size (tokens)": df["tokens"].min(),
+                "Max chunk size (tokens)": df["tokens"].max(),
+                "Average processing time (s)": df["processing_time"].mean(),
+                "Average output length (s)": df["output_length"].mean(),
+            },
         },
         {
             "title": "Performance Ranges",
             "stats": {
                 "Processing rate range (tokens/s)": f"{df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f}",
                 "RTF range": f"{df['rtf'].min():.2f}x - {df['rtf'].max():.2f}x",
-                "Real Time Speed range": f"{1/df['rtf'].max():.2f}x - {1/df['rtf'].min():.2f}x"
-            }
-        }
+                "Real Time Speed range": f"{1/df['rtf'].max():.2f}x - {1/df['rtf'].min():.2f}x",
+            },
+        },
     ]
-    write_benchmark_stats(stats, prefix_path(output_data_dir, "benchmark_stats_rtf.txt"))
+    write_benchmark_stats(
+        stats, prefix_path(output_data_dir, "benchmark_stats_rtf.txt")
+    )
 
     # Plot Processing Time vs Token Count
     plot_correlation(
-        df, "tokens", "processing_time",
+        df,
+        "tokens",
+        "processing_time",
         "Processing Time vs Input Size",
         "Number of Input Tokens",
         "Processing Time (seconds)",
-        prefix_path(output_plots_dir, "processing_time_rtf.png")
+        prefix_path(output_plots_dir, "processing_time_rtf.png"),
     )
 
     # Plot RTF vs Token Count
     plot_correlation(
-        df, "tokens", "rtf",
+        df,
+        "tokens",
+        "rtf",
         "Real-Time Factor vs Input Size",
         "Number of Input Tokens",
         "Real-Time Factor (processing time / audio length)",
-        prefix_path(output_plots_dir, "realtime_factor_rtf.png")
+        prefix_path(output_plots_dir, "realtime_factor_rtf.png"),
     )
 
     # Stop monitoring and get final metrics
     final_metrics = monitor.stop()
-    
+
     # Convert metrics timeline to DataFrame for stats
     metrics_df = pd.DataFrame(final_metrics)
-    
+
     # Add system usage stats
     if not metrics_df.empty:
-        stats.append({
-            "title": "System Usage Statistics",
-            "stats": {
-                "Peak CPU Usage (%)": metrics_df['cpu_percent'].max(),
-                "Avg CPU Usage (%)": metrics_df['cpu_percent'].mean(),
-                "Peak RAM Usage (%)": metrics_df['ram_percent'].max(),
-                "Avg RAM Usage (%)": metrics_df['ram_percent'].mean(),
-                "Peak RAM Used (GB)": metrics_df['ram_used_gb'].max(),
-                "Avg RAM Used (GB)": metrics_df['ram_used_gb'].mean(),
+        stats.append(
+            {
+                "title": "System Usage Statistics",
+                "stats": {
+                    "Peak CPU Usage (%)": metrics_df["cpu_percent"].max(),
+                    "Avg CPU Usage (%)": metrics_df["cpu_percent"].mean(),
+                    "Peak RAM Usage (%)": metrics_df["ram_percent"].max(),
+                    "Avg RAM Usage (%)": metrics_df["ram_percent"].mean(),
+                    "Peak RAM Used (GB)": metrics_df["ram_used_gb"].max(),
+                    "Avg RAM Used (GB)": metrics_df["ram_used_gb"].mean(),
+                },
             }
-        })
-        if 'gpu_memory_used' in metrics_df:
-            stats[-1]["stats"].update({
-                "Peak GPU Memory (MB)": metrics_df['gpu_memory_used'].max(),
-                "Avg GPU Memory (MB)": metrics_df['gpu_memory_used'].mean(),
-            })
-    
+        )
+        if "gpu_memory_used" in metrics_df:
+            stats[-1]["stats"].update(
+                {
+                    "Peak GPU Memory (MB)": metrics_df["gpu_memory_used"].max(),
+                    "Avg GPU Memory (MB)": metrics_df["gpu_memory_used"].mean(),
+                }
+            )
+
     # Plot system metrics
-    plot_system_metrics(final_metrics, prefix_path(output_plots_dir, "system_usage_rtf.png"))
+    plot_system_metrics(
+        final_metrics, prefix_path(output_plots_dir, "system_usage_rtf.png")
+    )
 
     # Save final results
     save_json_results(
         {
             "results": results,
             "system_metrics": final_metrics,
-            "test_duration": time.time() - test_start_time
+            "test_duration": time.time() - test_start_time,
         },
-        prefix_path(output_data_dir, "benchmark_results_rtf.json")
+        prefix_path(output_data_dir, "benchmark_results_rtf.json"),
     )
 
     print("\nResults saved to:")
diff --git a/examples/assorted_checks/benchmarks/depr_benchmark_tts.py b/examples/assorted_checks/benchmarks/depr_benchmark_tts.py
index 989c177..6fd600e 100644
--- a/examples/assorted_checks/benchmarks/depr_benchmark_tts.py
+++ b/examples/assorted_checks/benchmarks/depr_benchmark_tts.py
@@ -1,19 +1,30 @@
 import os
 import json
 import time
+
 import pandas as pd
-from examples.assorted_checks.lib.shared_plotting import plot_system_metrics, plot_correlation
+
 from examples.assorted_checks.lib.shared_utils import (
-    get_system_metrics, save_json_results, write_benchmark_stats
+    save_json_results,
+    get_system_metrics,
+    write_benchmark_stats,
+)
+from examples.assorted_checks.lib.shared_plotting import (
+    plot_correlation,
+    plot_system_metrics,
 )
 from examples.assorted_checks.lib.shared_benchmark_utils import (
-    get_text_for_tokens, make_tts_request, generate_token_sizes, enc
+    enc,
+    make_tts_request,
+    get_text_for_tokens,
+    generate_token_sizes,
 )
 
 
 def main():
     # Get optional prefix from first command line argument
     import sys
+
     prefix = sys.argv[1] if len(sys.argv) > 1 else ""
 
     # Set up paths relative to this file
@@ -21,7 +32,7 @@ def main():
     output_dir = os.path.join(script_dir, "output_audio")
     output_data_dir = os.path.join(script_dir, "output_data")
     output_plots_dir = os.path.join(script_dir, "output_plots")
-    
+
     # Create output directories
     os.makedirs(output_dir, exist_ok=True)
     os.makedirs(output_data_dir, exist_ok=True)
@@ -43,7 +54,6 @@ def main():
     total_tokens = len(enc.encode(text))
     print(f"Total tokens in file: {total_tokens}")
 
-
     token_sizes = generate_token_sizes(total_tokens)
 
     print(f"Testing sizes: {token_sizes}")
@@ -85,7 +95,7 @@ def main():
         # Save intermediate results
         save_json_results(
             {"results": results, "system_metrics": system_metrics},
-            prefix_path(output_data_dir, "benchmark_results.json")
+            prefix_path(output_data_dir, "benchmark_results.json"),
         )
 
     # Create DataFrame and calculate stats
@@ -102,53 +112,59 @@ def main():
         {
             "title": "Benchmark Statistics",
             "stats": {
-                "Total tokens processed": df['tokens'].sum(),
-                "Total audio generated (s)": df['output_length'].sum(),
-                "Total test duration (s)": df['elapsed_time'].max(),
-                "Average processing rate (tokens/s)": df['tokens_per_second'].mean(),
-                "Average realtime factor": df['realtime_factor'].mean()
-            }
+                "Total tokens processed": df["tokens"].sum(),
+                "Total audio generated (s)": df["output_length"].sum(),
+                "Total test duration (s)": df["elapsed_time"].max(),
+                "Average processing rate (tokens/s)": df["tokens_per_second"].mean(),
+                "Average realtime factor": df["realtime_factor"].mean(),
+            },
         },
         {
             "title": "Per-chunk Stats",
             "stats": {
-                "Average chunk size (tokens)": df['tokens'].mean(),
-                "Min chunk size (tokens)": df['tokens'].min(),
-                "Max chunk size (tokens)": df['tokens'].max(),
-                "Average processing time (s)": df['processing_time'].mean(),
-                "Average output length (s)": df['output_length'].mean()
-            }
+                "Average chunk size (tokens)": df["tokens"].mean(),
+                "Min chunk size (tokens)": df["tokens"].min(),
+                "Max chunk size (tokens)": df["tokens"].max(),
+                "Average processing time (s)": df["processing_time"].mean(),
+                "Average output length (s)": df["output_length"].mean(),
+            },
         },
         {
             "title": "Performance Ranges",
             "stats": {
                 "Processing rate range (tokens/s)": f"{df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f}",
-                "Realtime factor range": f"{df['realtime_factor'].min():.2f}x - {df['realtime_factor'].max():.2f}x"
-            }
-        }
+                "Realtime factor range": f"{df['realtime_factor'].min():.2f}x - {df['realtime_factor'].max():.2f}x",
+            },
+        },
     ]
     write_benchmark_stats(stats, prefix_path(output_data_dir, "benchmark_stats.txt"))
 
     # Plot Processing Time vs Token Count
     plot_correlation(
-        df, "tokens", "processing_time",
+        df,
+        "tokens",
+        "processing_time",
         "Processing Time vs Input Size",
         "Number of Input Tokens",
         "Processing Time (seconds)",
-        prefix_path(output_plots_dir, "processing_time.png")
+        prefix_path(output_plots_dir, "processing_time.png"),
     )
 
     # Plot Realtime Factor vs Token Count
     plot_correlation(
-        df, "tokens", "realtime_factor",
+        df,
+        "tokens",
+        "realtime_factor",
         "Realtime Factor vs Input Size",
         "Number of Input Tokens",
         "Realtime Factor (output length / processing time)",
-        prefix_path(output_plots_dir, "realtime_factor.png")
+        prefix_path(output_plots_dir, "realtime_factor.png"),
     )
 
     # Plot system metrics
-    plot_system_metrics(system_metrics, prefix_path(output_plots_dir, "system_usage.png"))
+    plot_system_metrics(
+        system_metrics, prefix_path(output_plots_dir, "system_usage.png")
+    )
 
     print("\nResults saved to:")
     print(f"- {prefix_path(output_data_dir, 'benchmark_results.json')}")
diff --git a/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py b/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py
index c2fd1c4..f44f7eb 100644
--- a/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py
+++ b/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py
@@ -1,11 +1,12 @@
 """Shared utilities specific to TTS benchmarking."""
+
 import time
-from typing import List, Optional, Tuple
+from typing import List, Tuple, Optional
 
 import requests
 import tiktoken
 
-from .shared_utils import get_audio_length, save_audio_file
+from .shared_utils import save_audio_file, get_audio_length
 
 # Global tokenizer instance
 enc = tiktoken.get_encoding("cl100k_base")
@@ -13,11 +14,11 @@ enc = tiktoken.get_encoding("cl100k_base")
 
 def get_text_for_tokens(text: str, num_tokens: int) -> str:
     """Get a slice of text that contains exactly num_tokens tokens.
-    
+
     Args:
         text: Input text to slice
         num_tokens: Desired number of tokens
-        
+
     Returns:
         str: Text slice containing exactly num_tokens tokens
     """
@@ -31,44 +32,69 @@ def make_tts_request(
     text: str,
     output_dir: str = None,
     timeout: int = 1800,
-    prefix: str = ""
+    prefix: str = "",
+    stream: bool = True,
 ) -> Tuple[Optional[float], Optional[float]]:
     """Make TTS request using OpenAI-compatible endpoint.
-    
+
     Args:
         text: Input text to convert to speech
         output_dir: Directory to save audio files. If None, audio won't be saved.
         timeout: Request timeout in seconds
         prefix: Optional prefix for output filenames
-        
+
     Returns:
         tuple: (processing_time, audio_length) in seconds, or (None, None) on error
     """
     try:
         start_time = time.time()
-        response = requests.post(
-            "http://localhost:8880/v1/audio/speech",
-            json={
-                "model": "kokoro",
-                "input": text,
-                "voice": "af",
-                "response_format": "wav",
-            },
-            timeout=timeout,
-        )
-        response.raise_for_status()
+        if stream:
+            # For streaming, we need to collect all chunks
+            audio_chunks = []
+            response = requests.post(
+                "http://localhost:8880/v1/audio/speech",
+                json={
+                    "model": "kokoro",
+                    "input": text,
+                    "voice": "af",
+                    "response_format": "wav",
+                    "stream": True,
+                },
+                timeout=timeout,
+                stream=True,
+            )
+            response.raise_for_status()
+
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    audio_chunks.append(chunk)
+
+            # Combine all chunks
+            audio_data = b"".join(audio_chunks)
+        else:
+            response = requests.post(
+                "http://localhost:8880/v1/audio/speech",
+                json={
+                    "model": "kokoro",
+                    "input": text,
+                    "voice": "af",
+                    "response_format": "wav",
+                    "stream": False,
+                },
+                timeout=timeout,
+            )
+            response.raise_for_status()
+            audio_data = response.content
 
         processing_time = round(time.time() - start_time, 2)
-        # Calculate audio length from response content
-        audio_length = get_audio_length(response.content)
-        
+        # Calculate audio length from audio data
+        audio_length = get_audio_length(audio_data)
+
         # Save the audio file if output_dir is provided
         if output_dir:
             token_count = len(enc.encode(text))
             output_file = save_audio_file(
-                response.content,
-                f"chunk_{token_count}_tokens",
-                output_dir
+                audio_data, f"chunk_{token_count}_tokens", output_dir
             )
             print(f"Saved audio to {output_file}")
 
@@ -86,26 +112,26 @@ def generate_token_sizes(
     max_tokens: int,
     dense_step: int = 100,
     dense_max: int = 1000,
-    sparse_step: int = 1000
+    sparse_step: int = 1000,
 ) -> List[int]:
     """Generate token size ranges with dense sampling at start.
-    
+
     Args:
         max_tokens: Maximum number of tokens to generate sizes up to
         dense_step: Step size for dense sampling range
         dense_max: Maximum value for dense sampling
         sparse_step: Step size for sparse sampling range
-        
+
     Returns:
         list: Sorted list of token sizes
     """
     # Dense sampling at start
     dense_range = list(range(dense_step, dense_max + 1, dense_step))
-    
+
     if max_tokens <= dense_max or sparse_step < dense_max:
         return sorted(dense_range)
     # Sparse sampling for larger sizes
     sparse_range = list(range(dense_max + sparse_step, max_tokens + 1, sparse_step))
-    
+
     # Combine and deduplicate
     return sorted(list(set(dense_range + sparse_range)))
diff --git a/examples/assorted_checks/benchmarks/lib/shared_plotting.py b/examples/assorted_checks/benchmarks/lib/shared_plotting.py
index 18e6e68..6ca4872 100644
--- a/examples/assorted_checks/benchmarks/lib/shared_plotting.py
+++ b/examples/assorted_checks/benchmarks/lib/shared_plotting.py
@@ -1,7 +1,8 @@
 """Shared plotting utilities for benchmarks and tests."""
+
+import numpy as np
 import pandas as pd
 import seaborn as sns
-import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 
@@ -12,66 +13,71 @@ STYLE_CONFIG = {
     "secondary_color": "#05d9e8",
     "grid_color": "#ffffff",
     "text_color": "#ffffff",
-    "font_sizes": {
-        "title": 16,
-        "label": 14,
-        "tick": 12,
-        "text": 10
-    }
+    "font_sizes": {"title": 16, "label": 14, "tick": 12, "text": 10},
 }
 
+
 def setup_plot(fig, ax, title, xlabel=None, ylabel=None):
     """Configure plot styling with consistent theme.
-    
+
     Args:
         fig: matplotlib figure object
         ax: matplotlib axis object
         title: str, plot title
         xlabel: str, optional x-axis label
         ylabel: str, optional y-axis label
-    
+
     Returns:
         tuple: (fig, ax) with applied styling
     """
     # Grid styling
     ax.grid(True, linestyle="--", alpha=0.3, color=STYLE_CONFIG["grid_color"])
-    
+
     # Title and labels
-    ax.set_title(title, pad=20, 
-                fontsize=STYLE_CONFIG["font_sizes"]["title"], 
-                fontweight="bold", 
-                color=STYLE_CONFIG["text_color"])
-    
+    ax.set_title(
+        title,
+        pad=20,
+        fontsize=STYLE_CONFIG["font_sizes"]["title"],
+        fontweight="bold",
+        color=STYLE_CONFIG["text_color"],
+    )
+
     if xlabel:
-        ax.set_xlabel(xlabel, 
-                     fontsize=STYLE_CONFIG["font_sizes"]["label"], 
-                     fontweight="medium", 
-                     color=STYLE_CONFIG["text_color"])
+        ax.set_xlabel(
+            xlabel,
+            fontsize=STYLE_CONFIG["font_sizes"]["label"],
+            fontweight="medium",
+            color=STYLE_CONFIG["text_color"],
+        )
     if ylabel:
-        ax.set_ylabel(ylabel, 
-                     fontsize=STYLE_CONFIG["font_sizes"]["label"], 
-                     fontweight="medium", 
-                     color=STYLE_CONFIG["text_color"])
-    
+        ax.set_ylabel(
+            ylabel,
+            fontsize=STYLE_CONFIG["font_sizes"]["label"],
+            fontweight="medium",
+            color=STYLE_CONFIG["text_color"],
+        )
+
     # Tick styling
-    ax.tick_params(labelsize=STYLE_CONFIG["font_sizes"]["tick"], 
-                  colors=STYLE_CONFIG["text_color"])
-    
+    ax.tick_params(
+        labelsize=STYLE_CONFIG["font_sizes"]["tick"], colors=STYLE_CONFIG["text_color"]
+    )
+
     # Spine styling
     for spine in ax.spines.values():
         spine.set_color(STYLE_CONFIG["text_color"])
         spine.set_alpha(0.3)
         spine.set_linewidth(0.5)
-    
+
     # Background colors
     ax.set_facecolor(STYLE_CONFIG["background_color"])
     fig.patch.set_facecolor(STYLE_CONFIG["background_color"])
-    
+
     return fig, ax
 
+
 def plot_system_metrics(metrics_data, output_path):
     """Create plots for system metrics over time.
-    
+
     Args:
         metrics_data: list of dicts containing system metrics
         output_path: str, path to save the output plot
@@ -79,68 +85,118 @@ def plot_system_metrics(metrics_data, output_path):
     df = pd.DataFrame(metrics_data)
     df["timestamp"] = pd.to_datetime(df["timestamp"])
     elapsed_time = (df["timestamp"] - df["timestamp"].iloc[0]).dt.total_seconds()
-    
+
     # Get baseline values
     baseline_cpu = df["cpu_percent"].iloc[0]
     baseline_ram = df["ram_used_gb"].iloc[0]
-    baseline_gpu = df["gpu_memory_used"].iloc[0] / 1024 if "gpu_memory_used" in df.columns else None
-    
+    baseline_gpu = (
+        df["gpu_memory_used"].iloc[0] / 1024
+        if "gpu_memory_used" in df.columns
+        else None
+    )
+
     # Convert GPU memory to GB if present
     if "gpu_memory_used" in df.columns:
         df["gpu_memory_gb"] = df["gpu_memory_used"] / 1024
-    
+
     plt.style.use("dark_background")
-    
+
     # Create subplots based on available metrics
     has_gpu = "gpu_memory_used" in df.columns
     num_plots = 3 if has_gpu else 2
     fig, axes = plt.subplots(num_plots, 1, figsize=(15, 5 * num_plots))
     fig.patch.set_facecolor(STYLE_CONFIG["background_color"])
-    
+
     # Smoothing window
     window = min(5, len(df) // 2)
-    
+
     # Plot CPU Usage
     smoothed_cpu = df["cpu_percent"].rolling(window=window, center=True).mean()
-    sns.lineplot(x=elapsed_time, y=smoothed_cpu, ax=axes[0], 
-                color=STYLE_CONFIG["primary_color"], linewidth=2)
-    axes[0].axhline(y=baseline_cpu, color=STYLE_CONFIG["secondary_color"], 
-                    linestyle="--", alpha=0.5, label="Baseline")
-    setup_plot(fig, axes[0], "CPU Usage Over Time", 
-              xlabel="Time (seconds)", ylabel="CPU Usage (%)")
+    sns.lineplot(
+        x=elapsed_time,
+        y=smoothed_cpu,
+        ax=axes[0],
+        color=STYLE_CONFIG["primary_color"],
+        linewidth=2,
+    )
+    axes[0].axhline(
+        y=baseline_cpu,
+        color=STYLE_CONFIG["secondary_color"],
+        linestyle="--",
+        alpha=0.5,
+        label="Baseline",
+    )
+    setup_plot(
+        fig,
+        axes[0],
+        "CPU Usage Over Time",
+        xlabel="Time (seconds)",
+        ylabel="CPU Usage (%)",
+    )
     axes[0].set_ylim(0, max(df["cpu_percent"]) * 1.1)
     axes[0].legend()
-    
+
     # Plot RAM Usage
     smoothed_ram = df["ram_used_gb"].rolling(window=window, center=True).mean()
-    sns.lineplot(x=elapsed_time, y=smoothed_ram, ax=axes[1], 
-                color=STYLE_CONFIG["secondary_color"], linewidth=2)
-    axes[1].axhline(y=baseline_ram, color=STYLE_CONFIG["primary_color"], 
-                    linestyle="--", alpha=0.5, label="Baseline")
-    setup_plot(fig, axes[1], "RAM Usage Over Time", 
-              xlabel="Time (seconds)", ylabel="RAM Usage (GB)")
+    sns.lineplot(
+        x=elapsed_time,
+        y=smoothed_ram,
+        ax=axes[1],
+        color=STYLE_CONFIG["secondary_color"],
+        linewidth=2,
+    )
+    axes[1].axhline(
+        y=baseline_ram,
+        color=STYLE_CONFIG["primary_color"],
+        linestyle="--",
+        alpha=0.5,
+        label="Baseline",
+    )
+    setup_plot(
+        fig,
+        axes[1],
+        "RAM Usage Over Time",
+        xlabel="Time (seconds)",
+        ylabel="RAM Usage (GB)",
+    )
     axes[1].set_ylim(0, max(df["ram_used_gb"]) * 1.1)
     axes[1].legend()
-    
+
     # Plot GPU Memory if available
     if has_gpu:
         smoothed_gpu = df["gpu_memory_gb"].rolling(window=window, center=True).mean()
-        sns.lineplot(x=elapsed_time, y=smoothed_gpu, ax=axes[2], 
-                    color=STYLE_CONFIG["primary_color"], linewidth=2)
-        axes[2].axhline(y=baseline_gpu, color=STYLE_CONFIG["secondary_color"], 
-                        linestyle="--", alpha=0.5, label="Baseline")
-        setup_plot(fig, axes[2], "GPU Memory Usage Over Time", 
-                  xlabel="Time (seconds)", ylabel="GPU Memory (GB)")
+        sns.lineplot(
+            x=elapsed_time,
+            y=smoothed_gpu,
+            ax=axes[2],
+            color=STYLE_CONFIG["primary_color"],
+            linewidth=2,
+        )
+        axes[2].axhline(
+            y=baseline_gpu,
+            color=STYLE_CONFIG["secondary_color"],
+            linestyle="--",
+            alpha=0.5,
+            label="Baseline",
+        )
+        setup_plot(
+            fig,
+            axes[2],
+            "GPU Memory Usage Over Time",
+            xlabel="Time (seconds)",
+            ylabel="GPU Memory (GB)",
+        )
         axes[2].set_ylim(0, max(df["gpu_memory_gb"]) * 1.1)
         axes[2].legend()
-    
+
     plt.tight_layout()
     plt.savefig(output_path, dpi=300, bbox_inches="tight")
     plt.close()
 
-def plot_timeline(df, output_path, suffix=""):
+
+def plot_timeline(df, output_path, suffix="", prefix=""):
     """Create timeline plot showing latency for each run.
-    
+
     Args:
         df: pandas DataFrame containing run data with columns:
             - target_tokens: number of tokens
@@ -149,124 +205,161 @@ def plot_timeline(df, output_path, suffix=""):
         output_path: str, path to save the output plot
     """
     plt.style.use("dark_background")
-    
+
     # Sort by tokens and run number
-    df = df.sort_values(['target_tokens', 'run_number'])
-    
+    df = df.sort_values(["target_tokens", "run_number"])
+
     # Create figure and axis
     fig, ax = plt.subplots(figsize=(12, 6))
-    
+
     # Calculate y positions for each run with tighter grouping
-    unique_tokens = sorted(df['target_tokens'].unique())
+    unique_tokens = sorted(df["target_tokens"].unique())
     y_positions = {}
     current_y = 0
     group_spacing = 0.8  # Space between groups
-    run_spacing = 0.2    # Space between runs in a group
-    
+    run_spacing = 0.2  # Space between runs in a group
+
     for tokens in unique_tokens:
-        runs = df[df['target_tokens'] == tokens]
+        runs = df[df["target_tokens"] == tokens]
         base_y = current_y
         for i, (_, run) in enumerate(runs.iterrows()):
-            y_positions[(tokens, run['run_number'])] = base_y + (i * run_spacing)
+            y_positions[(tokens, run["run_number"])] = base_y + (i * run_spacing)
         current_y = base_y + (len(runs) * run_spacing) + group_spacing
-    
+
     # Plot bars and points with more transparency
     bar_height = 0.15
     for _, row in df.iterrows():
-        y = y_positions[(row['target_tokens'], row['run_number'])]
-        latency = row['time_to_first_chunk']
-        
+        y = y_positions[(row["target_tokens"], row["run_number"])]
+        latency = row["time_to_first_chunk"]
+
         # Latency bar
-        ax.add_patch(patches.Rectangle(
-            (0, y - bar_height/2),
-            latency,
-            bar_height,
-            facecolor=STYLE_CONFIG["primary_color"],
-            alpha=0.3
-        ))
-        
+        ax.add_patch(
+            patches.Rectangle(
+                (0, y - bar_height / 2),
+                latency,
+                bar_height,
+                facecolor=STYLE_CONFIG["primary_color"],
+                alpha=0.3,
+            )
+        )
+
         # End point
-        ax.plot(latency, y, 'o', 
-                color=STYLE_CONFIG["secondary_color"],
-                markersize=4,
-                alpha=0.5)
-    
+        ax.plot(
+            latency,
+            y,
+            "o",
+            color=STYLE_CONFIG["secondary_color"],
+            markersize=4,
+            alpha=0.5,
+        )
+
     # Add mean lines and values for each token group
     for tokens in unique_tokens:
-        token_runs = df[df['target_tokens'] == tokens]
-        mean_latency = token_runs['time_to_first_chunk'].mean()
-        y_positions_for_token = [y_positions[(tokens, run['run_number'])] for _, run in token_runs.iterrows()]
+        token_runs = df[df["target_tokens"] == tokens]
+        mean_latency = token_runs["time_to_first_chunk"].mean()
+        y_positions_for_token = [
+            y_positions[(tokens, run["run_number"])] for _, run in token_runs.iterrows()
+        ]
         min_y = min(y_positions_for_token)
         max_y = max(y_positions_for_token)
         group_center = (min_y + max_y) / 2
-        
+
         # Plot mean line with gradient alpha
         gradient = np.linspace(0.2, 0.8, 100)
-        for i in range(len(gradient)-1):
-            y1 = min_y - bar_height + (max_y - min_y + 2*bar_height) * (i/len(gradient))
-            y2 = min_y - bar_height + (max_y - min_y + 2*bar_height) * ((i+1)/len(gradient))
-            ax.plot([mean_latency, mean_latency], [y1, y2],
-                   '-', color=STYLE_CONFIG["secondary_color"],
-                   linewidth=3, alpha=gradient[i])
-        
+        for i in range(len(gradient) - 1):
+            y1 = (
+                min_y
+                - bar_height
+                + (max_y - min_y + 2 * bar_height) * (i / len(gradient))
+            )
+            y2 = (
+                min_y
+                - bar_height
+                + (max_y - min_y + 2 * bar_height) * ((i + 1) / len(gradient))
+            )
+            ax.plot(
+                [mean_latency, mean_latency],
+                [y1, y2],
+                "-",
+                color=STYLE_CONFIG["secondary_color"],
+                linewidth=3,
+                alpha=gradient[i],
+            )
+
         # Add mean value label with background
-        label_text = f'Mean: {mean_latency:.3f}s'
+        label_text = f"Mean: {mean_latency:.3f}s"
         bbox_props = dict(
             facecolor=STYLE_CONFIG["background_color"],
             edgecolor=STYLE_CONFIG["secondary_color"],
             alpha=0.8,
             pad=3,
-            linewidth=1
+            linewidth=1,
         )
-        ax.text(mean_latency + 0.02, group_center,
-                label_text,
-                color=STYLE_CONFIG["secondary_color"],
-                va='center',
-                fontsize=10,
-                fontweight='bold',
-                bbox=bbox_props)
-    
+        ax.text(
+            mean_latency + 0.02,
+            group_center,
+            label_text,
+            color=STYLE_CONFIG["secondary_color"],
+            va="center",
+            fontsize=10,
+            fontweight="bold",
+            bbox=bbox_props,
+        )
+
     # Customize plot
     ax.set_ylim(-1, current_y)
-    ax.set_xlim(0, df['time_to_first_chunk'].max() * 1.3)  # Extra space for labels
-    
+    ax.set_xlim(0, df["time_to_first_chunk"].max() * 1.3)  # Extra space for labels
+
     # Add labels for token groups with tighter spacing
     group_positions = {}
     for tokens in unique_tokens:
-        runs = df[df['target_tokens'] == tokens]
-        y_positions_for_token = [y_positions[(tokens, run['run_number'])] for _, run in runs.iterrows()]
-        group_positions[tokens] = sum(y_positions_for_token) / len(y_positions_for_token)
-        plt.axhline(y=min(y_positions_for_token) - bar_height, 
-                   color='white', alpha=0.1, linestyle='-')
-    
+        runs = df[df["target_tokens"] == tokens]
+        y_positions_for_token = [
+            y_positions[(tokens, run["run_number"])] for _, run in runs.iterrows()
+        ]
+        group_positions[tokens] = sum(y_positions_for_token) / len(
+            y_positions_for_token
+        )
+        plt.axhline(
+            y=min(y_positions_for_token) - bar_height,
+            color="white",
+            alpha=0.1,
+            linestyle="-",
+        )
+
     # Calculate mean audio length for each token group
     audio_lengths = {}
     for tokens in unique_tokens:
-        token_runs = df[df['target_tokens'] == tokens]
-        audio_lengths[tokens] = token_runs['audio_length'].mean()
+        token_runs = df[df["target_tokens"] == tokens]
+        audio_lengths[tokens] = token_runs["audio_length"].mean()
 
     # Set y-ticks at group centers with token counts and audio lengths
     plt.yticks(
         list(group_positions.values()),
-        [f'{tokens} tokens\n({audio_lengths[tokens]:.1f}s)' for tokens in group_positions.keys()],
-        fontsize=10
+        [
+            f"{tokens} tokens\n({audio_lengths[tokens]:.1f}s)"
+            for tokens in group_positions.keys()
+        ],
+        fontsize=10,
     )
-    
+
     # Customize appearance
     setup_plot(
-        fig, ax,
-        "Time-To-Audio Latency" + suffix,
+        fig,
+        ax,
+        prefix.upper() + " Time-To-Audio Latency " + suffix,
         xlabel="Time (seconds)",
-        ylabel="Input Size"
+        ylabel="Input Size",
     )
-    
+
     plt.tight_layout()
     plt.savefig(output_path, dpi=300, bbox_inches="tight")
     plt.close()
 
+
 def plot_correlation(df, x, y, title, xlabel, ylabel, output_path):
     """Create correlation plot with regression line and correlation coefficient.
-    
+
     Args:
         df: pandas DataFrame containing the data
         x: str, column name for x-axis
@@ -277,28 +370,40 @@ def plot_correlation(df, x, y, title, xlabel, ylabel, output_path):
         output_path: str, path to save the output plot
     """
     plt.style.use("dark_background")
-    
+
     fig, ax = plt.subplots(figsize=(12, 8))
-    
+
     # Scatter plot
-    sns.scatterplot(data=df, x=x, y=y, s=100, alpha=0.6, 
-                    color=STYLE_CONFIG["primary_color"])
-    
+    sns.scatterplot(
+        data=df, x=x, y=y, s=100, alpha=0.6, color=STYLE_CONFIG["primary_color"]
+    )
+
     # Regression line
-    sns.regplot(data=df, x=x, y=y, scatter=False, 
-                color=STYLE_CONFIG["secondary_color"], 
-                line_kws={"linewidth": 2})
-    
+    sns.regplot(
+        data=df,
+        x=x,
+        y=y,
+        scatter=False,
+        color=STYLE_CONFIG["secondary_color"],
+        line_kws={"linewidth": 2},
+    )
+
     # Add correlation coefficient
     corr = df[x].corr(df[y])
-    plt.text(0.05, 0.95, f"Correlation: {corr:.2f}", 
-             transform=ax.transAxes, 
-             fontsize=STYLE_CONFIG["font_sizes"]["text"], 
-             color=STYLE_CONFIG["text_color"],
-             bbox=dict(facecolor=STYLE_CONFIG["background_color"], 
-                      edgecolor=STYLE_CONFIG["text_color"], 
-                      alpha=0.7))
-    
+    plt.text(
+        0.05,
+        0.95,
+        f"Correlation: {corr:.2f}",
+        transform=ax.transAxes,
+        fontsize=STYLE_CONFIG["font_sizes"]["text"],
+        color=STYLE_CONFIG["text_color"],
+        bbox=dict(
+            facecolor=STYLE_CONFIG["background_color"],
+            edgecolor=STYLE_CONFIG["text_color"],
+            alpha=0.7,
+        ),
+    )
+
     setup_plot(fig, ax, title, xlabel=xlabel, ylabel=ylabel)
     plt.savefig(output_path, dpi=300, bbox_inches="tight")
     plt.close()
diff --git a/examples/assorted_checks/benchmarks/lib/shared_utils.py b/examples/assorted_checks/benchmarks/lib/shared_utils.py
index a9c872e..1e3fbdb 100644
--- a/examples/assorted_checks/benchmarks/lib/shared_utils.py
+++ b/examples/assorted_checks/benchmarks/lib/shared_utils.py
@@ -1,9 +1,10 @@
 """Shared utilities for benchmarks and tests."""
+
 import os
 import json
 import subprocess
+from typing import Any, Dict, List, Union, Optional
 from datetime import datetime
-from typing import Any, Dict, List, Optional, Union
 
 import psutil
 import scipy.io.wavfile as wavfile
@@ -12,28 +13,46 @@ import scipy.io.wavfile as wavfile
 TORCH_AVAILABLE = False
 try:
     import torch
+
     TORCH_AVAILABLE = torch.cuda.is_available()
 except ImportError:
     pass
 
 
+def check_audio_file_is_silent(audio_path: str, threshold: float = 0.01) -> bool:
+    """Check if an audio file is silent by comparing peak amplitude to a threshold.
+
+    Args:
+        audio_path: Path to the audio file
+        threshold: Peak amplitude threshold for silence
+
+    Returns:
+        bool: True if audio is silent, False otherwise
+    """
+    rate, data = wavfile.read(audio_path)
+    peak_amplitude = max(abs(data.min()), abs(data.max())) / 32768.0  # 16-bit audio
+
+    return peak_amplitude < threshold
+
+
 def get_audio_length(audio_data: bytes, temp_dir: str = None) -> float:
     """Get audio length in seconds from bytes data.
-    
+
     Args:
         audio_data: Raw audio bytes
         temp_dir: Directory for temporary file. If None, uses system temp directory.
-        
+
     Returns:
         float: Audio length in seconds
     """
     if temp_dir is None:
         import tempfile
+
         temp_dir = tempfile.gettempdir()
-    
+
     temp_path = os.path.join(temp_dir, "temp.wav")
     os.makedirs(temp_dir, exist_ok=True)
-    
+
     with open(temp_path, "wb") as f:
         f.write(audio_data)
 
@@ -47,11 +66,11 @@ def get_audio_length(audio_data: bytes, temp_dir: str = None) -> float:
 
 def get_gpu_memory(average: bool = True) -> Optional[Union[float, List[float]]]:
     """Get GPU memory usage using PyTorch if available, falling back to nvidia-smi.
-    
+
     Args:
         average: If True and multiple GPUs present, returns average memory usage.
                 If False, returns list of memory usage per GPU.
-    
+
     Returns:
         float or List[float] or None: GPU memory usage in MB. Returns None if no GPU available.
         If average=False and multiple GPUs present, returns list of values.
@@ -60,19 +79,23 @@ def get_gpu_memory(average: bool = True) -> Optional[Union[float, List[float]]]:
         n_gpus = torch.cuda.device_count()
         memory_used = []
         for i in range(n_gpus):
-            memory_used.append(torch.cuda.memory_allocated(i) / 1024**2)  # Convert to MB
-        
+            memory_used.append(
+                torch.cuda.memory_allocated(i) / 1024**2
+            )  # Convert to MB
+
         if average and len(memory_used) > 0:
             return sum(memory_used) / len(memory_used)
         return memory_used if len(memory_used) > 1 else memory_used[0]
-    
+
     # Fall back to nvidia-smi
     try:
         result = subprocess.check_output(
             ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,nounits,noheader"]
         )
-        memory_values = [float(x.strip()) for x in result.decode("utf-8").split("\n") if x.strip()]
-        
+        memory_values = [
+            float(x.strip()) for x in result.decode("utf-8").split("\n") if x.strip()
+        ]
+
         if average and len(memory_values) > 0:
             return sum(memory_values) / len(memory_values)
         return memory_values if len(memory_values) > 1 else memory_values[0]
@@ -82,14 +105,14 @@ def get_gpu_memory(average: bool = True) -> Optional[Union[float, List[float]]]:
 
 def get_system_metrics() -> Dict[str, Union[str, float]]:
     """Get current system metrics including CPU, RAM, and GPU if available.
-    
+
     Returns:
         dict: System metrics including timestamp, CPU%, RAM%, RAM GB, and GPU MB if available
     """
     # Get per-CPU percentages and calculate average
     cpu_percentages = psutil.cpu_percent(percpu=True)
     avg_cpu = sum(cpu_percentages) / len(cpu_percentages)
-    
+
     metrics = {
         "timestamp": datetime.now().isoformat(),
         "cpu_percent": round(avg_cpu, 2),
@@ -106,40 +129,40 @@ def get_system_metrics() -> Dict[str, Union[str, float]]:
 
 def save_audio_file(audio_data: bytes, identifier: str, output_dir: str) -> str:
     """Save audio data to a file with proper naming and directory creation.
-    
+
     Args:
         audio_data: Raw audio bytes
         identifier: String to identify this audio file (e.g. token count, test name)
         output_dir: Directory to save the file
-        
+
     Returns:
         str: Path to the saved audio file
     """
     os.makedirs(output_dir, exist_ok=True)
     output_file = os.path.join(output_dir, f"{identifier}.wav")
-    
+
     with open(output_file, "wb") as f:
         f.write(audio_data)
-        
+
     return output_file
 
 
 def write_benchmark_stats(stats: List[Dict[str, Any]], output_file: str) -> None:
     """Write benchmark statistics to a file in a clean, organized format.
-    
+
     Args:
         stats: List of dictionaries containing stat name/value pairs
         output_file: Path to output file
     """
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
-    
+
     with open(output_file, "w") as f:
         for section in stats:
             # Write section header
             f.write(f"=== {section['title']} ===\n\n")
-            
+
             # Write stats
-            for label, value in section['stats'].items():
+            for label, value in section["stats"].items():
                 if isinstance(value, float):
                     f.write(f"{label}: {value:.2f}\n")
                 else:
@@ -149,7 +172,7 @@ def write_benchmark_stats(stats: List[Dict[str, Any]], output_file: str) -> None
 
 def save_json_results(results: Dict[str, Any], output_file: str) -> None:
     """Save benchmark results to a JSON file with proper formatting.
-    
+
     Args:
         results: Dictionary of results to save
         output_file: Path to output file
@@ -159,14 +182,16 @@ def save_json_results(results: Dict[str, Any], output_file: str) -> None:
         json.dump(results, f, indent=2)
 
 
-def real_time_factor(processing_time: float, audio_length: float, decimals: int = 2) -> float:
+def real_time_factor(
+    processing_time: float, audio_length: float, decimals: int = 2
+) -> float:
     """Calculate Real-Time Factor (RTF) as processing-time / length-of-audio.
-    
+
     Args:
         processing_time: Time taken to process/generate audio
         audio_length: Length of the generated audio
         decimals: Number of decimal places to round to
-        
+
     Returns:
         float: RTF value
     """
diff --git a/examples/assorted_checks/benchmarks/lib/stream_utils.py b/examples/assorted_checks/benchmarks/lib/stream_utils.py
new file mode 100644
index 0000000..623b18a
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/lib/stream_utils.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+import os
+import time
+import wave
+from typing import Any, Dict, List, Callable, Optional
+
+import pandas as pd
+import scipy.io.wavfile as wavfile
+
+from .shared_utils import save_json_results
+from .shared_plotting import plot_timeline, plot_correlation
+from .shared_benchmark_utils import enc, get_text_for_tokens
+
+
+def check_audio_silence(audio_path: str) -> bool:
+    """Check if audio file contains only silence"""
+    sample_rate, audio_data = wavfile.read(audio_path)
+    # Convert to float for RMS calculation
+    audio_float = audio_data.astype(float)
+    # Calculate RMS value
+    rms = (audio_float**2).mean() ** 0.5
+    # Define silence threshold (adjust if needed)
+    SILENCE_THRESHOLD = 50.0
+    return rms < SILENCE_THRESHOLD
+
+
+def process_benchmark_results(
+    all_results: List[Dict[str, Any]], token_sizes: List[int]
+) -> Dict[str, Any]:
+    """Process benchmark results and generate summary"""
+    summary = {}
+    for tokens in token_sizes:
+        matching_results = [
+            r for r in all_results if r["target_tokens"] == tokens and not r["error"]
+        ]
+        if matching_results:
+            avg_first_chunk = sum(
+                r["time_to_first_chunk"] for r in matching_results
+            ) / len(matching_results)
+            avg_total = sum(r["total_time"] for r in matching_results) / len(
+                matching_results
+            )
+            avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(
+                matching_results
+            )
+            summary[tokens] = {
+                "avg_time_to_first_chunk": round(avg_first_chunk, 3),
+                "avg_total_time": round(avg_total, 3),
+                "avg_audio_length": round(avg_audio_length, 3),
+                "num_successful_runs": len(matching_results),
+            }
+    return summary
+
+
+def save_benchmark_results(
+    all_results: List[Dict[str, Any]],
+    summary: Dict[str, Any],
+    output_data_dir: str,
+    output_plots_dir: str,
+    suffix: str,
+    plot_title_suffix: str,
+    prefix: str = "",
+):
+    """Save benchmark results and generate plots"""
+    # Save results
+    results_data = {
+        "individual_runs": all_results,
+        "summary": summary,
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+    }
+    save_json_results(
+        results_data,
+        os.path.join(output_data_dir, f"{prefix}first_token_benchmark{suffix}.json"),
+    )
+
+    # Create DataFrame for plotting
+    df = pd.DataFrame(all_results)
+
+    # Create plots
+    plot_correlation(
+        df,
+        "target_tokens",
+        "time_to_first_chunk",
+        f"Time to First Audio vs Input Size {plot_title_suffix}",
+        "Number of Input Tokens",
+        "Time to First Audio (seconds)",
+        os.path.join(output_plots_dir, f"{prefix}first_token_latency{suffix}.png"),
+    )
+
+    plot_correlation(
+        df,
+        "target_tokens",
+        "total_time",
+        f"Total Time vs Input Size {plot_title_suffix}",
+        "Number of Input Tokens",
+        "Total Time (seconds)",
+        os.path.join(output_plots_dir, f"{prefix}total_time_latency{suffix}.png"),
+    )
+
+    plot_timeline(
+        df,
+        os.path.join(output_plots_dir, f"{prefix}first_token_timeline{suffix}.png"),
+        suffix=plot_title_suffix,
+    )
+
+
+def run_benchmark(
+    measure_func: Callable,
+    output_dir: str,
+    output_data_dir: str,
+    output_plots_dir: str,
+    suffix: str = "",
+    plot_title_suffix: str = "",
+    num_runs: int = 5,
+    client=None,
+    prefix="",
+):
+    """Run benchmark with the given measurement function"""
+    # Create output directories
+    os.makedirs(output_dir, exist_ok=True)
+    os.makedirs(output_data_dir, exist_ok=True)
+    os.makedirs(output_plots_dir, exist_ok=True)
+
+    # Load sample text
+    script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    with open(
+        os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8"
+    ) as f:
+        text = f.read()
+
+    # Test specific token counts
+    token_sizes = [10, 50, 100, 250, 500]
+    all_results = []
+    silent_files = []
+
+    for tokens in token_sizes:
+        print(
+            f"\nTesting {tokens} tokens{' ' + plot_title_suffix if plot_title_suffix else ''}"
+        )
+        test_text = get_text_for_tokens(text, tokens)
+        actual_tokens = len(enc.encode(test_text))
+        print(f"Text preview: {test_text[:50]}...")
+
+        for i in range(num_runs):
+            print(f"Run {i+1}/{num_runs}...")
+            result = measure_func(test_text, output_dir, tokens, i + 1)
+            result["target_tokens"] = tokens
+            result["actual_tokens"] = actual_tokens
+            result["run_number"] = i + 1
+
+            # Handle time to first audio
+            first_chunk = result.get('time_to_first_chunk')
+            print(
+                f"Time to First Audio: {f'{first_chunk:.3f}s' if first_chunk is not None else 'N/A'}"
+            )
+            
+            # Handle total time
+            total_time = result.get('total_time')
+            print(
+                f"Time to Save Complete: {f'{total_time:.3f}s' if total_time is not None else 'N/A'}"
+            )
+            
+            # Handle audio length
+            audio_length = result.get('audio_length')
+            print(
+                f"Audio length: {f'{audio_length:.3f}s' if audio_length is not None else 'N/A'}"
+            )
+            # Calculate streaming overhead only if both values exist
+            if total_time is not None and first_chunk is not None:
+                print(f"Streaming overhead: {(total_time - first_chunk):.3f}s")
+            else:
+                print("Streaming overhead: N/A")
+
+            if result["error"]:
+                print(f"Error: {result['error']}")
+            elif result["audio_path"] and check_audio_silence(result["audio_path"]):
+                silent_files.append(result["audio_path"])
+
+            all_results.append(result)
+
+    # Process and save results
+    summary = process_benchmark_results(all_results, token_sizes)
+    save_benchmark_results(
+        all_results,
+        summary,
+        output_data_dir,
+        output_plots_dir,
+        suffix,
+        plot_title_suffix,
+    )
+
+    # Print paths
+    print("\nResults and plots saved to:")
+    print(f"- {os.path.join(output_data_dir, f'{prefix}first_token_benchmark{suffix}.json')}")
+    print(f"- {os.path.join(output_plots_dir, f'{prefix}first_token_latency{suffix}.png')}")
+    print(f"- {os.path.join(output_plots_dir, f'{prefix}total_time_latency{suffix}.png')}")
+    print(f"- {os.path.join(output_plots_dir, f'{prefix}first_token_timeline{suffix}.png')}")
+
+    # Print silence check summary
+    if silent_files:
+        print("\nWARNING: The following files contain only silence:")
+        for file in silent_files:
+            print(f"- {file}")
+    else:
+        print("\nAll generated audio files contain valid audio content.")
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results.json
deleted file mode 100644
index 5c60933..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_results.json
+++ /dev/null
@@ -1,111 +0,0 @@
-{
-  "results": [
-    {
-      "tokens": 100,
-      "processing_time": 18.833295583724976,
-      "output_length": 31.15,
-      "realtime_factor": 1.6539856161403135,
-      "elapsed_time": 19.024322748184204
-    },
-    {
-      "tokens": 200,
-      "processing_time": 38.95506024360657,
-      "output_length": 62.6,
-      "realtime_factor": 1.6069799304257042,
-      "elapsed_time": 58.21527123451233
-    },
-    {
-      "tokens": 300,
-      "processing_time": 49.74252939224243,
-      "output_length": 96.325,
-      "realtime_factor": 1.9364716908630366,
-      "elapsed_time": 108.19673728942871
-    },
-    {
-      "tokens": 400,
-      "processing_time": 61.349056243896484,
-      "output_length": 128.575,
-      "realtime_factor": 2.095794261102292,
-      "elapsed_time": 169.733656167984
-    },
-    {
-      "tokens": 500,
-      "processing_time": 82.86568236351013,
-      "output_length": 158.575,
-      "realtime_factor": 1.9136389815071193,
-      "elapsed_time": 252.7968451976776
-    }
-  ],
-  "system_metrics": [
-    {
-      "timestamp": "2025-01-03T00:13:49.865330",
-      "cpu_percent": 8.0,
-      "ram_percent": 39.4,
-      "ram_used_gb": 25.03811264038086,
-      "gpu_memory_used": 1204.0
-    },
-    {
-      "timestamp": "2025-01-03T00:14:08.781551",
-      "cpu_percent": 26.8,
-      "ram_percent": 42.6,
-      "ram_used_gb": 27.090862274169922,
-      "gpu_memory_used": 1225.0
-    },
-    {
-      "timestamp": "2025-01-03T00:14:08.916973",
-      "cpu_percent": 16.1,
-      "ram_percent": 42.6,
-      "ram_used_gb": 27.089553833007812,
-      "gpu_memory_used": 1225.0
-    },
-    {
-      "timestamp": "2025-01-03T00:14:47.979053",
-      "cpu_percent": 31.5,
-      "ram_percent": 43.6,
-      "ram_used_gb": 27.714427947998047,
-      "gpu_memory_used": 1225.0
-    },
-    {
-      "timestamp": "2025-01-03T00:14:48.098976",
-      "cpu_percent": 20.0,
-      "ram_percent": 43.6,
-      "ram_used_gb": 27.704315185546875,
-      "gpu_memory_used": 1211.0
-    },
-    {
-      "timestamp": "2025-01-03T00:15:37.944729",
-      "cpu_percent": 29.7,
-      "ram_percent": 38.6,
-      "ram_used_gb": 24.53925323486328,
-      "gpu_memory_used": 1217.0
-    },
-    {
-      "timestamp": "2025-01-03T00:15:38.071915",
-      "cpu_percent": 8.6,
-      "ram_percent": 38.5,
-      "ram_used_gb": 24.51690673828125,
-      "gpu_memory_used": 1208.0
-    },
-    {
-      "timestamp": "2025-01-03T00:16:39.525449",
-      "cpu_percent": 23.4,
-      "ram_percent": 38.8,
-      "ram_used_gb": 24.71230697631836,
-      "gpu_memory_used": 1221.0
-    },
-    {
-      "timestamp": "2025-01-03T00:16:39.612442",
-      "cpu_percent": 5.5,
-      "ram_percent": 38.9,
-      "ram_used_gb": 24.72066879272461,
-      "gpu_memory_used": 1221.0
-    },
-    {
-      "timestamp": "2025-01-03T00:18:02.569076",
-      "cpu_percent": 27.4,
-      "ram_percent": 39.1,
-      "ram_used_gb": 24.868202209472656,
-      "gpu_memory_used": 1264.0
-    }
-  ]
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json
deleted file mode 100644
index 52f8f04..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json
+++ /dev/null
@@ -1,216 +0,0 @@
-{
-  "results": [
-    {
-      "tokens": 100,
-      "processing_time": 14.349808931350708,
-      "output_length": 31.15,
-      "rtf": 0.46,
-      "elapsed_time": 14.716031074523926
-    },
-    {
-      "tokens": 200,
-      "processing_time": 28.341803312301636,
-      "output_length": 62.6,
-      "rtf": 0.45,
-      "elapsed_time": 43.44207406044006
-    },
-    {
-      "tokens": 300,
-      "processing_time": 43.352553606033325,
-      "output_length": 96.325,
-      "rtf": 0.45,
-      "elapsed_time": 87.26906609535217
-    },
-    {
-      "tokens": 400,
-      "processing_time": 71.02449822425842,
-      "output_length": 128.575,
-      "rtf": 0.55,
-      "elapsed_time": 158.7198133468628
-    },
-    {
-      "tokens": 500,
-      "processing_time": 70.92521691322327,
-      "output_length": 158.575,
-      "rtf": 0.45,
-      "elapsed_time": 230.01379895210266
-    },
-    {
-      "tokens": 600,
-      "processing_time": 83.6328592300415,
-      "output_length": 189.25,
-      "rtf": 0.44,
-      "elapsed_time": 314.02610969543457
-    },
-    {
-      "tokens": 700,
-      "processing_time": 103.0810194015503,
-      "output_length": 222.075,
-      "rtf": 0.46,
-      "elapsed_time": 417.5678551197052
-    },
-    {
-      "tokens": 800,
-      "processing_time": 127.02162909507751,
-      "output_length": 253.85,
-      "rtf": 0.5,
-      "elapsed_time": 545.0128681659698
-    },
-    {
-      "tokens": 900,
-      "processing_time": 130.49781227111816,
-      "output_length": 283.775,
-      "rtf": 0.46,
-      "elapsed_time": 675.8943417072296
-    },
-    {
-      "tokens": 1000,
-      "processing_time": 154.76425909996033,
-      "output_length": 315.475,
-      "rtf": 0.49,
-      "elapsed_time": 831.0677945613861
-    }
-  ],
-  "system_metrics": [
-    {
-      "timestamp": "2025-01-03T00:23:52.896889",
-      "cpu_percent": 4.5,
-      "ram_percent": 39.1,
-      "ram_used_gb": 24.86032485961914,
-      "gpu_memory_used": 1281.0
-    },
-    {
-      "timestamp": "2025-01-03T00:24:07.429461",
-      "cpu_percent": 4.5,
-      "ram_percent": 39.1,
-      "ram_used_gb": 24.847564697265625,
-      "gpu_memory_used": 1285.0
-    },
-    {
-      "timestamp": "2025-01-03T00:24:07.620587",
-      "cpu_percent": 2.7,
-      "ram_percent": 39.1,
-      "ram_used_gb": 24.846607208251953,
-      "gpu_memory_used": 1275.0
-    },
-    {
-      "timestamp": "2025-01-03T00:24:36.140754",
-      "cpu_percent": 5.4,
-      "ram_percent": 39.1,
-      "ram_used_gb": 24.857810974121094,
-      "gpu_memory_used": 1267.0
-    },
-    {
-      "timestamp": "2025-01-03T00:24:36.340675",
-      "cpu_percent": 6.2,
-      "ram_percent": 39.1,
-      "ram_used_gb": 24.85773468017578,
-      "gpu_memory_used": 1267.0
-    },
-    {
-      "timestamp": "2025-01-03T00:25:19.905634",
-      "cpu_percent": 29.1,
-      "ram_percent": 39.2,
-      "ram_used_gb": 24.920318603515625,
-      "gpu_memory_used": 1256.0
-    },
-    {
-      "timestamp": "2025-01-03T00:25:20.182219",
-      "cpu_percent": 20.0,
-      "ram_percent": 39.2,
-      "ram_used_gb": 24.930198669433594,
-      "gpu_memory_used": 1256.0
-    },
-    {
-      "timestamp": "2025-01-03T00:26:31.414760",
-      "cpu_percent": 5.3,
-      "ram_percent": 39.5,
-      "ram_used_gb": 25.127891540527344,
-      "gpu_memory_used": 1259.0
-    },
-    {
-      "timestamp": "2025-01-03T00:26:31.617256",
-      "cpu_percent": 3.6,
-      "ram_percent": 39.5,
-      "ram_used_gb": 25.126346588134766,
-      "gpu_memory_used": 1252.0
-    },
-    {
-      "timestamp": "2025-01-03T00:27:42.736097",
-      "cpu_percent": 10.5,
-      "ram_percent": 39.5,
-      "ram_used_gb": 25.100231170654297,
-      "gpu_memory_used": 1249.0
-    },
-    {
-      "timestamp": "2025-01-03T00:27:42.912870",
-      "cpu_percent": 5.3,
-      "ram_percent": 39.5,
-      "ram_used_gb": 25.098285675048828,
-      "gpu_memory_used": 1249.0
-    },
-    {
-      "timestamp": "2025-01-03T00:29:06.725264",
-      "cpu_percent": 8.9,
-      "ram_percent": 39.5,
-      "ram_used_gb": 25.123123168945312,
-      "gpu_memory_used": 1239.0
-    },
-    {
-      "timestamp": "2025-01-03T00:29:06.928826",
-      "cpu_percent": 5.5,
-      "ram_percent": 39.5,
-      "ram_used_gb": 25.128646850585938,
-      "gpu_memory_used": 1239.0
-    },
-    {
-      "timestamp": "2025-01-03T00:30:50.206349",
-      "cpu_percent": 49.6,
-      "ram_percent": 39.6,
-      "ram_used_gb": 25.162948608398438,
-      "gpu_memory_used": 1245.0
-    },
-    {
-      "timestamp": "2025-01-03T00:30:50.491837",
-      "cpu_percent": 14.8,
-      "ram_percent": 39.5,
-      "ram_used_gb": 25.13379669189453,
-      "gpu_memory_used": 1245.0
-    },
-    {
-      "timestamp": "2025-01-03T00:32:57.721467",
-      "cpu_percent": 6.2,
-      "ram_percent": 39.6,
-      "ram_used_gb": 25.187721252441406,
-      "gpu_memory_used": 1384.0
-    },
-    {
-      "timestamp": "2025-01-03T00:32:57.913350",
-      "cpu_percent": 3.6,
-      "ram_percent": 39.6,
-      "ram_used_gb": 25.199390411376953,
-      "gpu_memory_used": 1384.0
-    },
-    {
-      "timestamp": "2025-01-03T00:35:08.608730",
-      "cpu_percent": 6.3,
-      "ram_percent": 39.8,
-      "ram_used_gb": 25.311710357666016,
-      "gpu_memory_used": 1330.0
-    },
-    {
-      "timestamp": "2025-01-03T00:35:08.791851",
-      "cpu_percent": 5.3,
-      "ram_percent": 39.8,
-      "ram_used_gb": 25.326683044433594,
-      "gpu_memory_used": 1333.0
-    },
-    {
-      "timestamp": "2025-01-03T00:37:43.782406",
-      "cpu_percent": 6.8,
-      "ram_percent": 40.6,
-      "ram_used_gb": 25.803058624267578,
-      "gpu_memory_used": 1409.0
-    }
-  ]
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json
deleted file mode 100644
index 59ad009..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json
+++ /dev/null
@@ -1,300 +0,0 @@
-{
-  "results": [
-    {
-      "tokens": 100,
-      "processing_time": 0.96,
-      "output_length": 31.1,
-      "rtf": 0.03,
-      "elapsed_time": 1.11
-    },
-    {
-      "tokens": 250,
-      "processing_time": 2.23,
-      "output_length": 77.17,
-      "rtf": 0.03,
-      "elapsed_time": 3.49
-    },
-    {
-      "tokens": 400,
-      "processing_time": 4.05,
-      "output_length": 128.05,
-      "rtf": 0.03,
-      "elapsed_time": 7.77
-    },
-    {
-      "tokens": 550,
-      "processing_time": 4.06,
-      "output_length": 171.45,
-      "rtf": 0.02,
-      "elapsed_time": 12.0
-    },
-    {
-      "tokens": 700,
-      "processing_time": 6.01,
-      "output_length": 221.6,
-      "rtf": 0.03,
-      "elapsed_time": 18.16
-    },
-    {
-      "tokens": 850,
-      "processing_time": 6.9,
-      "output_length": 269.1,
-      "rtf": 0.03,
-      "elapsed_time": 25.21
-    },
-    {
-      "tokens": 1000,
-      "processing_time": 7.65,
-      "output_length": 315.05,
-      "rtf": 0.02,
-      "elapsed_time": 33.03
-    },
-    {
-      "tokens": 6000,
-      "processing_time": 48.7,
-      "output_length": 1837.1,
-      "rtf": 0.03,
-      "elapsed_time": 82.21
-    },
-    {
-      "tokens": 11000,
-      "processing_time": 92.44,
-      "output_length": 3388.57,
-      "rtf": 0.03,
-      "elapsed_time": 175.46
-    },
-    {
-      "tokens": 16000,
-      "processing_time": 163.61,
-      "output_length": 4977.32,
-      "rtf": 0.03,
-      "elapsed_time": 340.46
-    },
-    {
-      "tokens": 21000,
-      "processing_time": 209.72,
-      "output_length": 6533.3,
-      "rtf": 0.03,
-      "elapsed_time": 551.92
-    },
-    {
-      "tokens": 26000,
-      "processing_time": 329.35,
-      "output_length": 8068.15,
-      "rtf": 0.04,
-      "elapsed_time": 883.37
-    },
-    {
-      "tokens": 31000,
-      "processing_time": 473.52,
-      "output_length": 9611.48,
-      "rtf": 0.05,
-      "elapsed_time": 1359.28
-    },
-    {
-      "tokens": 36000,
-      "processing_time": 650.98,
-      "output_length": 11157.15,
-      "rtf": 0.06,
-      "elapsed_time": 2012.9
-    }
-  ],
-  "system_metrics": [
-    {
-      "timestamp": "2025-01-03T14:41:01.331735",
-      "cpu_percent": 7.5,
-      "ram_percent": 50.2,
-      "ram_used_gb": 31.960269927978516,
-      "gpu_memory_used": 3191.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:02.357116",
-      "cpu_percent": 17.01,
-      "ram_percent": 50.2,
-      "ram_used_gb": 31.96163558959961,
-      "gpu_memory_used": 3426.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:02.445009",
-      "cpu_percent": 9.5,
-      "ram_percent": 50.3,
-      "ram_used_gb": 31.966781616210938,
-      "gpu_memory_used": 3426.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:04.742152",
-      "cpu_percent": 18.27,
-      "ram_percent": 50.4,
-      "ram_used_gb": 32.08788299560547,
-      "gpu_memory_used": 3642.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:04.847795",
-      "cpu_percent": 16.27,
-      "ram_percent": 50.5,
-      "ram_used_gb": 32.094364166259766,
-      "gpu_memory_used": 3640.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:09.019590",
-      "cpu_percent": 15.97,
-      "ram_percent": 50.7,
-      "ram_used_gb": 32.23244094848633,
-      "gpu_memory_used": 3640.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:09.110324",
-      "cpu_percent": 3.54,
-      "ram_percent": 50.7,
-      "ram_used_gb": 32.234458923339844,
-      "gpu_memory_used": 3640.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:13.252607",
-      "cpu_percent": 13.4,
-      "ram_percent": 50.6,
-      "ram_used_gb": 32.194271087646484,
-      "gpu_memory_used": 3935.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:13.327557",
-      "cpu_percent": 4.69,
-      "ram_percent": 50.6,
-      "ram_used_gb": 32.191776275634766,
-      "gpu_memory_used": 3935.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:19.413633",
-      "cpu_percent": 12.92,
-      "ram_percent": 50.9,
-      "ram_used_gb": 32.3467903137207,
-      "gpu_memory_used": 4250.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:19.492758",
-      "cpu_percent": 7.5,
-      "ram_percent": 50.8,
-      "ram_used_gb": 32.34375,
-      "gpu_memory_used": 4250.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:26.467284",
-      "cpu_percent": 13.09,
-      "ram_percent": 51.2,
-      "ram_used_gb": 32.56281280517578,
-      "gpu_memory_used": 4249.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:26.553559",
-      "cpu_percent": 8.39,
-      "ram_percent": 51.2,
-      "ram_used_gb": 32.56183624267578,
-      "gpu_memory_used": 4249.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:34.284362",
-      "cpu_percent": 12.61,
-      "ram_percent": 51.7,
-      "ram_used_gb": 32.874778747558594,
-      "gpu_memory_used": 4250.0
-    },
-    {
-      "timestamp": "2025-01-03T14:41:34.362353",
-      "cpu_percent": 1.25,
-      "ram_percent": 51.7,
-      "ram_used_gb": 32.87461471557617,
-      "gpu_memory_used": 4250.0
-    },
-    {
-      "timestamp": "2025-01-03T14:42:23.471312",
-      "cpu_percent": 11.64,
-      "ram_percent": 54.9,
-      "ram_used_gb": 34.90264129638672,
-      "gpu_memory_used": 4647.0
-    },
-    {
-      "timestamp": "2025-01-03T14:42:23.547203",
-      "cpu_percent": 5.31,
-      "ram_percent": 54.9,
-      "ram_used_gb": 34.91563415527344,
-      "gpu_memory_used": 4647.0
-    },
-    {
-      "timestamp": "2025-01-03T14:43:56.724933",
-      "cpu_percent": 12.97,
-      "ram_percent": 59.5,
-      "ram_used_gb": 37.84241485595703,
-      "gpu_memory_used": 4655.0
-    },
-    {
-      "timestamp": "2025-01-03T14:43:56.815453",
-      "cpu_percent": 11.75,
-      "ram_percent": 59.5,
-      "ram_used_gb": 37.832679748535156,
-      "gpu_memory_used": 4655.0
-    },
-    {
-      "timestamp": "2025-01-03T14:46:41.705155",
-      "cpu_percent": 12.94,
-      "ram_percent": 66.3,
-      "ram_used_gb": 42.1534538269043,
-      "gpu_memory_used": 4729.0
-    },
-    {
-      "timestamp": "2025-01-03T14:46:41.835177",
-      "cpu_percent": 7.73,
-      "ram_percent": 66.2,
-      "ram_used_gb": 42.13554000854492,
-      "gpu_memory_used": 4729.0
-    },
-    {
-      "timestamp": "2025-01-03T14:50:13.166236",
-      "cpu_percent": 11.62,
-      "ram_percent": 73.4,
-      "ram_used_gb": 46.71288299560547,
-      "gpu_memory_used": 4676.0
-    },
-    {
-      "timestamp": "2025-01-03T14:50:13.261611",
-      "cpu_percent": 8.16,
-      "ram_percent": 73.4,
-      "ram_used_gb": 46.71356201171875,
-      "gpu_memory_used": 4676.0
-    },
-    {
-      "timestamp": "2025-01-03T14:55:44.623607",
-      "cpu_percent": 12.92,
-      "ram_percent": 82.8,
-      "ram_used_gb": 52.65533447265625,
-      "gpu_memory_used": 4636.0
-    },
-    {
-      "timestamp": "2025-01-03T14:55:44.735410",
-      "cpu_percent": 15.29,
-      "ram_percent": 82.7,
-      "ram_used_gb": 52.63290786743164,
-      "gpu_memory_used": 4636.0
-    },
-    {
-      "timestamp": "2025-01-03T15:03:40.534449",
-      "cpu_percent": 13.88,
-      "ram_percent": 85.0,
-      "ram_used_gb": 54.050071716308594,
-      "gpu_memory_used": 4771.0
-    },
-    {
-      "timestamp": "2025-01-03T15:03:40.638708",
-      "cpu_percent": 12.21,
-      "ram_percent": 85.0,
-      "ram_used_gb": 54.053733825683594,
-      "gpu_memory_used": 4771.0
-    },
-    {
-      "timestamp": "2025-01-03T15:14:34.159142",
-      "cpu_percent": 14.51,
-      "ram_percent": 78.1,
-      "ram_used_gb": 49.70396423339844,
-      "gpu_memory_used": 4739.0
-    }
-  ]
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt
deleted file mode 100644
index 010d116..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-=== Benchmark Statistics (with correct RTF) ===
-
-Overall Stats:
-Total tokens processed: 5500
-Total audio generated: 1741.65s
-Total test duration: 831.07s
-Average processing rate: 6.72 tokens/second
-Average RTF: 0.47x
-
-Per-chunk Stats:
-Average chunk size: 550.00 tokens
-Min chunk size: 100.00 tokens
-Max chunk size: 1000.00 tokens
-Average processing time: 82.70s
-Average output length: 174.17s
-
-Performance Ranges:
-Processing rate range: 5.63 - 7.17 tokens/second
-RTF range: 0.44x - 0.55x
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt
deleted file mode 100644
index e7bed5f..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-=== Benchmark Statistics (with correct RTF) ===
-
-Overall Stats:
-Total tokens processed: 150850
-Total audio generated: 46786.59s
-Total test duration: 2012.90s
-Average processing rate: 104.34 tokens/second
-Average RTF: 0.03x
-
diff --git a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json
deleted file mode 100644
index edcb334..0000000
--- a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json
+++ /dev/null
@@ -1,1804 +0,0 @@
-{
-  "results": [
-    {
-      "tokens": 300,
-      "processing_time": 41.62,
-      "output_length": 96.425,
-      "rtf": 0.43,
-      "elapsed_time": 41.68
-    },
-    {
-      "tokens": 600,
-      "processing_time": 81.72,
-      "output_length": 188.675,
-      "rtf": 0.43,
-      "elapsed_time": 123.49
-    },
-    {
-      "tokens": 900,
-      "processing_time": 120.55,
-      "output_length": 283.425,
-      "rtf": 0.43,
-      "elapsed_time": 244.1
-    }
-  ],
-  "system_metrics": [
-    {
-      "timestamp": "2025-01-04T01:30:26.991154",
-      "cpu_percent": 7.83,
-      "ram_percent": 48.2,
-      "ram_used_gb": 30.669906616210938,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 0.07800030708312988
-    },
-    {
-      "timestamp": "2025-01-04T01:30:28.079669",
-      "cpu_percent": 59.43,
-      "ram_percent": 48.2,
-      "ram_used_gb": 30.675106048583984,
-      "gpu_memory_used": 1244.0,
-      "relative_time": 1.1842052936553955
-    },
-    {
-      "timestamp": "2025-01-04T01:30:29.185881",
-      "cpu_percent": 57.14,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.473060607910156,
-      "gpu_memory_used": 1246.0,
-      "relative_time": 2.31345796585083
-    },
-    {
-      "timestamp": "2025-01-04T01:30:30.312825",
-      "cpu_percent": 49.54,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.49838638305664,
-      "gpu_memory_used": 1248.0,
-      "relative_time": 3.42720627784729
-    },
-    {
-      "timestamp": "2025-01-04T01:30:31.421201",
-      "cpu_percent": 47.16,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.44550323486328,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 4.517812728881836
-    },
-    {
-      "timestamp": "2025-01-04T01:30:32.514913",
-      "cpu_percent": 47.98,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.41952896118164,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 5.647390604019165
-    },
-    {
-      "timestamp": "2025-01-04T01:30:33.649021",
-      "cpu_percent": 48.55,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.400592803955078,
-      "gpu_memory_used": 1249.0,
-      "relative_time": 6.729969263076782
-    },
-    {
-      "timestamp": "2025-01-04T01:30:34.723785",
-      "cpu_percent": 43.88,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.390079498291016,
-      "gpu_memory_used": 1253.0,
-      "relative_time": 7.860571622848511
-    },
-    {
-      "timestamp": "2025-01-04T01:30:35.864707",
-      "cpu_percent": 50.01,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.380477905273438,
-      "gpu_memory_used": 1253.0,
-      "relative_time": 8.9869704246521
-    },
-    {
-      "timestamp": "2025-01-04T01:30:36.982950",
-      "cpu_percent": 49.29,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.41130828857422,
-      "gpu_memory_used": 1255.0,
-      "relative_time": 10.097310066223145
-    },
-    {
-      "timestamp": "2025-01-04T01:30:38.099505",
-      "cpu_percent": 52.99,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.410892486572266,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 11.204046249389648
-    },
-    {
-      "timestamp": "2025-01-04T01:30:39.205066",
-      "cpu_percent": 42.98,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.40534210205078,
-      "gpu_memory_used": 1253.0,
-      "relative_time": 12.306914329528809
-    },
-    {
-      "timestamp": "2025-01-04T01:30:40.305591",
-      "cpu_percent": 47.11,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.40200424194336,
-      "gpu_memory_used": 1253.0,
-      "relative_time": 13.411193370819092
-    },
-    {
-      "timestamp": "2025-01-04T01:30:41.410928",
-      "cpu_percent": 50.09,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.39764404296875,
-      "gpu_memory_used": 1260.0,
-      "relative_time": 14.534100770950317
-    },
-    {
-      "timestamp": "2025-01-04T01:30:42.530654",
-      "cpu_percent": 57.82,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.39893341064453,
-      "gpu_memory_used": 1256.0,
-      "relative_time": 15.66111135482788
-    },
-    {
-      "timestamp": "2025-01-04T01:30:43.666031",
-      "cpu_percent": 52.61,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.37706756591797,
-      "gpu_memory_used": 1256.0,
-      "relative_time": 16.79327368736267
-    },
-    {
-      "timestamp": "2025-01-04T01:30:44.794904",
-      "cpu_percent": 57.14,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.36868667602539,
-      "gpu_memory_used": 1256.0,
-      "relative_time": 17.861677646636963
-    },
-    {
-      "timestamp": "2025-01-04T01:30:45.865891",
-      "cpu_percent": 66.7,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.371902465820312,
-      "gpu_memory_used": 1257.0,
-      "relative_time": 18.96451497077942
-    },
-    {
-      "timestamp": "2025-01-04T01:30:46.971206",
-      "cpu_percent": 53.61,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.352508544921875,
-      "gpu_memory_used": 1254.0,
-      "relative_time": 20.086195945739746
-    },
-    {
-      "timestamp": "2025-01-04T01:30:48.089632",
-      "cpu_percent": 50.26,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.349388122558594,
-      "gpu_memory_used": 1248.0,
-      "relative_time": 21.199003219604492
-    },
-    {
-      "timestamp": "2025-01-04T01:30:49.191842",
-      "cpu_percent": 48.22,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.344642639160156,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 22.322958946228027
-    },
-    {
-      "timestamp": "2025-01-04T01:30:50.324994",
-      "cpu_percent": 55.64,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.35323715209961,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 23.469967365264893
-    },
-    {
-      "timestamp": "2025-01-04T01:30:51.477231",
-      "cpu_percent": 45.68,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.35232162475586,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 24.579415798187256
-    },
-    {
-      "timestamp": "2025-01-04T01:30:52.585934",
-      "cpu_percent": 46.07,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.32147216796875,
-      "gpu_memory_used": 1244.0,
-      "relative_time": 25.71301007270813
-    },
-    {
-      "timestamp": "2025-01-04T01:30:53.707821",
-      "cpu_percent": 47.54,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.296611785888672,
-      "gpu_memory_used": 1244.0,
-      "relative_time": 26.7750301361084
-    },
-    {
-      "timestamp": "2025-01-04T01:30:54.766880",
-      "cpu_percent": 44.8,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.28769302368164,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 27.87526297569275
-    },
-    {
-      "timestamp": "2025-01-04T01:30:55.873403",
-      "cpu_percent": 48.82,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.285594940185547,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 29.00292205810547
-    },
-    {
-      "timestamp": "2025-01-04T01:30:57.003386",
-      "cpu_percent": 55.54,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.30721664428711,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 30.13248038291931
-    },
-    {
-      "timestamp": "2025-01-04T01:30:58.135723",
-      "cpu_percent": 46.97,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.319698333740234,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 31.280652046203613
-    },
-    {
-      "timestamp": "2025-01-04T01:30:59.274397",
-      "cpu_percent": 46.94,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.31420135498047,
-      "gpu_memory_used": 1239.0,
-      "relative_time": 32.39983797073364
-    },
-    {
-      "timestamp": "2025-01-04T01:31:00.405545",
-      "cpu_percent": 53.81,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.335922241210938,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 33.502938985824585
-    },
-    {
-      "timestamp": "2025-01-04T01:31:01.497496",
-      "cpu_percent": 51.0,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.325199127197266,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 34.584938526153564
-    },
-    {
-      "timestamp": "2025-01-04T01:31:02.583134",
-      "cpu_percent": 49.26,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.30097198486328,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 35.680947065353394
-    },
-    {
-      "timestamp": "2025-01-04T01:31:03.686381",
-      "cpu_percent": 48.91,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.300418853759766,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 36.786722898483276
-    },
-    {
-      "timestamp": "2025-01-04T01:31:04.786497",
-      "cpu_percent": 48.69,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.29620361328125,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 37.90794491767883
-    },
-    {
-      "timestamp": "2025-01-04T01:31:05.908563",
-      "cpu_percent": 50.43,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.29269027709961,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 39.01517176628113
-    },
-    {
-      "timestamp": "2025-01-04T01:31:07.014496",
-      "cpu_percent": 48.22,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.298015594482422,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 40.118446826934814
-    },
-    {
-      "timestamp": "2025-01-04T01:31:08.120066",
-      "cpu_percent": 47.47,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.312705993652344,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 41.22802424430847
-    },
-    {
-      "timestamp": "2025-01-04T01:31:09.225367",
-      "cpu_percent": 41.09,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.34886932373047,
-      "gpu_memory_used": 1244.0,
-      "relative_time": 42.34174656867981
-    },
-    {
-      "timestamp": "2025-01-04T01:31:10.339308",
-      "cpu_percent": 44.12,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.353790283203125,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 43.44456744194031
-    },
-    {
-      "timestamp": "2025-01-04T01:31:11.443944",
-      "cpu_percent": 48.99,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.34658432006836,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 44.53658318519592
-    },
-    {
-      "timestamp": "2025-01-04T01:31:12.533026",
-      "cpu_percent": 47.62,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.318241119384766,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 45.6171441078186
-    },
-    {
-      "timestamp": "2025-01-04T01:31:13.617044",
-      "cpu_percent": 49.3,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.318588256835938,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 46.71653604507446
-    },
-    {
-      "timestamp": "2025-01-04T01:31:14.718976",
-      "cpu_percent": 48.42,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.316349029541016,
-      "gpu_memory_used": 1239.0,
-      "relative_time": 47.80844783782959
-    },
-    {
-      "timestamp": "2025-01-04T01:31:15.805079",
-      "cpu_percent": 47.56,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.30520248413086,
-      "gpu_memory_used": 1239.0,
-      "relative_time": 48.90499949455261
-    },
-    {
-      "timestamp": "2025-01-04T01:31:16.902878",
-      "cpu_percent": 49.11,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.306812286376953,
-      "gpu_memory_used": 1232.0,
-      "relative_time": 50.034260749816895
-    },
-    {
-      "timestamp": "2025-01-04T01:31:18.035723",
-      "cpu_percent": 45.81,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.32524871826172,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 51.1371693611145
-    },
-    {
-      "timestamp": "2025-01-04T01:31:19.143169",
-      "cpu_percent": 49.94,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.323795318603516,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 52.227344274520874
-    },
-    {
-      "timestamp": "2025-01-04T01:31:20.230256",
-      "cpu_percent": 39.57,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.330493927001953,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 53.34033155441284
-    },
-    {
-      "timestamp": "2025-01-04T01:31:21.331797",
-      "cpu_percent": 44.34,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.330425262451172,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 54.45246958732605
-    },
-    {
-      "timestamp": "2025-01-04T01:31:22.450663",
-      "cpu_percent": 46.87,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.3084716796875,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 55.55728077888489
-    },
-    {
-      "timestamp": "2025-01-04T01:31:23.550691",
-      "cpu_percent": 49.88,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.309173583984375,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 56.65515089035034
-    },
-    {
-      "timestamp": "2025-01-04T01:31:24.650939",
-      "cpu_percent": 51.21,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.30620574951172,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 57.726617097854614
-    },
-    {
-      "timestamp": "2025-01-04T01:31:25.728955",
-      "cpu_percent": 45.22,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.291912078857422,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 58.82792663574219
-    },
-    {
-      "timestamp": "2025-01-04T01:31:26.829490",
-      "cpu_percent": 48.86,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.289695739746094,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 59.93786025047302
-    },
-    {
-      "timestamp": "2025-01-04T01:31:27.937071",
-      "cpu_percent": 45.69,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.302818298339844,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 61.05047869682312
-    },
-    {
-      "timestamp": "2025-01-04T01:31:29.044046",
-      "cpu_percent": 51.09,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.307464599609375,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 62.159112215042114
-    },
-    {
-      "timestamp": "2025-01-04T01:31:30.162426",
-      "cpu_percent": 47.04,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.32668685913086,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 63.249592542648315
-    },
-    {
-      "timestamp": "2025-01-04T01:31:31.251755",
-      "cpu_percent": 45.32,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.330463409423828,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 64.35896062850952
-    },
-    {
-      "timestamp": "2025-01-04T01:31:32.362284",
-      "cpu_percent": 47.2,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.314319610595703,
-      "gpu_memory_used": 1239.0,
-      "relative_time": 65.4672338962555
-    },
-    {
-      "timestamp": "2025-01-04T01:31:33.468921",
-      "cpu_percent": 48.94,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.308246612548828,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 66.5955581665039
-    },
-    {
-      "timestamp": "2025-01-04T01:31:34.594176",
-      "cpu_percent": 47.88,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.29806137084961,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 67.68029594421387
-    },
-    {
-      "timestamp": "2025-01-04T01:31:35.682260",
-      "cpu_percent": 45.92,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.299114227294922,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 68.7970290184021
-    },
-    {
-      "timestamp": "2025-01-04T01:31:36.802433",
-      "cpu_percent": 51.07,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.29195785522461,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 69.92168736457825
-    },
-    {
-      "timestamp": "2025-01-04T01:31:37.926464",
-      "cpu_percent": 47.29,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.324363708496094,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 71.05467820167542
-    },
-    {
-      "timestamp": "2025-01-04T01:31:39.059936",
-      "cpu_percent": 48.91,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.32428741455078,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 72.14405465126038
-    },
-    {
-      "timestamp": "2025-01-04T01:31:40.142859",
-      "cpu_percent": 44.66,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.33354949951172,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 73.25559496879578
-    },
-    {
-      "timestamp": "2025-01-04T01:31:41.254868",
-      "cpu_percent": 48.98,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.344337463378906,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 74.35676956176758
-    },
-    {
-      "timestamp": "2025-01-04T01:31:42.354977",
-      "cpu_percent": 50.79,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.322650909423828,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 75.43929266929626
-    },
-    {
-      "timestamp": "2025-01-04T01:31:43.432869",
-      "cpu_percent": 45.86,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.316268920898438,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 76.53794598579407
-    },
-    {
-      "timestamp": "2025-01-04T01:31:44.535917",
-      "cpu_percent": 47.22,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.308757781982422,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 77.6620762348175
-    },
-    {
-      "timestamp": "2025-01-04T01:31:45.666281",
-      "cpu_percent": 51.06,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.307342529296875,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 78.77155900001526
-    },
-    {
-      "timestamp": "2025-01-04T01:31:46.771605",
-      "cpu_percent": 47.82,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.298141479492188,
-      "gpu_memory_used": 1237.0,
-      "relative_time": 79.87201809883118
-    },
-    {
-      "timestamp": "2025-01-04T01:31:47.874817",
-      "cpu_percent": 44.51,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.322750091552734,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 80.97521829605103
-    },
-    {
-      "timestamp": "2025-01-04T01:31:48.983338",
-      "cpu_percent": 47.69,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.3226318359375,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 82.09707593917847
-    },
-    {
-      "timestamp": "2025-01-04T01:31:50.102541",
-      "cpu_percent": 42.36,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.32965087890625,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 83.20944809913635
-    },
-    {
-      "timestamp": "2025-01-04T01:31:51.204766",
-      "cpu_percent": 45.87,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.32353973388672,
-      "gpu_memory_used": 1243.0,
-      "relative_time": 84.31531429290771
-    },
-    {
-      "timestamp": "2025-01-04T01:31:52.310873",
-      "cpu_percent": 50.01,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.296016693115234,
-      "gpu_memory_used": 1247.0,
-      "relative_time": 85.4254515171051
-    },
-    {
-      "timestamp": "2025-01-04T01:31:53.429342",
-      "cpu_percent": 49.65,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.306453704833984,
-      "gpu_memory_used": 1246.0,
-      "relative_time": 86.51991653442383
-    },
-    {
-      "timestamp": "2025-01-04T01:31:54.517894",
-      "cpu_percent": 47.29,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.30263900756836,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 87.60364723205566
-    },
-    {
-      "timestamp": "2025-01-04T01:31:55.602848",
-      "cpu_percent": 47.48,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.303203582763672,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 88.68531346321106
-    },
-    {
-      "timestamp": "2025-01-04T01:31:56.677895",
-      "cpu_percent": 46.74,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.29749298095703,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 89.78639531135559
-    },
-    {
-      "timestamp": "2025-01-04T01:31:57.794084",
-      "cpu_percent": 43.92,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.313438415527344,
-      "gpu_memory_used": 1244.0,
-      "relative_time": 90.89922308921814
-    },
-    {
-      "timestamp": "2025-01-04T01:31:58.901464",
-      "cpu_percent": 48.88,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.32254409790039,
-      "gpu_memory_used": 1244.0,
-      "relative_time": 91.96823143959045
-    },
-    {
-      "timestamp": "2025-01-04T01:31:59.972227",
-      "cpu_percent": 38.89,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.32897186279297,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 93.08689904212952
-    },
-    {
-      "timestamp": "2025-01-04T01:32:01.089013",
-      "cpu_percent": 49.22,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.328304290771484,
-      "gpu_memory_used": 1250.0,
-      "relative_time": 94.20951867103577
-    },
-    {
-      "timestamp": "2025-01-04T01:32:02.202304",
-      "cpu_percent": 46.56,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.29920196533203,
-      "gpu_memory_used": 1250.0,
-      "relative_time": 95.29210877418518
-    },
-    {
-      "timestamp": "2025-01-04T01:32:03.292108",
-      "cpu_percent": 46.39,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.308143615722656,
-      "gpu_memory_used": 1250.0,
-      "relative_time": 96.40629982948303
-    },
-    {
-      "timestamp": "2025-01-04T01:32:04.402400",
-      "cpu_percent": 49.88,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.310047149658203,
-      "gpu_memory_used": 1250.0,
-      "relative_time": 97.51973557472229
-    },
-    {
-      "timestamp": "2025-01-04T01:32:05.513450",
-      "cpu_percent": 53.28,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.30374526977539,
-      "gpu_memory_used": 1249.0,
-      "relative_time": 98.62612318992615
-    },
-    {
-      "timestamp": "2025-01-04T01:32:06.631627",
-      "cpu_percent": 44.65,
-      "ram_percent": 47.6,
-      "ram_used_gb": 30.30333709716797,
-      "gpu_memory_used": 1242.0,
-      "relative_time": 99.73457670211792
-    },
-    {
-      "timestamp": "2025-01-04T01:32:07.736449",
-      "cpu_percent": 50.93,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.33118438720703,
-      "gpu_memory_used": 1242.0,
-      "relative_time": 100.85807871818542
-    },
-    {
-      "timestamp": "2025-01-04T01:32:08.860429",
-      "cpu_percent": 62.71,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.41672134399414,
-      "gpu_memory_used": 1244.0,
-      "relative_time": 102.08941197395325
-    },
-    {
-      "timestamp": "2025-01-04T01:32:10.080974",
-      "cpu_percent": 96.29,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.45757293701172,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 103.18154048919678
-    },
-    {
-      "timestamp": "2025-01-04T01:32:11.187912",
-      "cpu_percent": 49.09,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.445499420166016,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 104.30198311805725
-    },
-    {
-      "timestamp": "2025-01-04T01:32:12.306213",
-      "cpu_percent": 51.15,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.42266845703125,
-      "gpu_memory_used": 1240.0,
-      "relative_time": 105.43745422363281
-    },
-    {
-      "timestamp": "2025-01-04T01:32:13.437791",
-      "cpu_percent": 47.79,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.40296173095703,
-      "gpu_memory_used": 1236.0,
-      "relative_time": 106.55609393119812
-    },
-    {
-      "timestamp": "2025-01-04T01:32:14.548441",
-      "cpu_percent": 39.41,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.406475067138672,
-      "gpu_memory_used": 1244.0,
-      "relative_time": 107.67082047462463
-    },
-    {
-      "timestamp": "2025-01-04T01:32:15.666526",
-      "cpu_percent": 77.07,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.424407958984375,
-      "gpu_memory_used": 1247.0,
-      "relative_time": 108.7851665019989
-    },
-    {
-      "timestamp": "2025-01-04T01:32:16.780793",
-      "cpu_percent": 49.13,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.429065704345703,
-      "gpu_memory_used": 1246.0,
-      "relative_time": 109.88107633590698
-    },
-    {
-      "timestamp": "2025-01-04T01:32:17.879071",
-      "cpu_percent": 82.96,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.428447723388672,
-      "gpu_memory_used": 1281.0,
-      "relative_time": 111.02328372001648
-    },
-    {
-      "timestamp": "2025-01-04T01:32:19.026978",
-      "cpu_percent": 74.64,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.430500030517578,
-      "gpu_memory_used": 1281.0,
-      "relative_time": 112.15347504615784
-    },
-    {
-      "timestamp": "2025-01-04T01:32:20.156784",
-      "cpu_percent": 76.94,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.40774917602539,
-      "gpu_memory_used": 1274.0,
-      "relative_time": 113.31317591667175
-    },
-    {
-      "timestamp": "2025-01-04T01:32:21.310871",
-      "cpu_percent": 69.52,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.429115295410156,
-      "gpu_memory_used": 1273.0,
-      "relative_time": 114.42301273345947
-    },
-    {
-      "timestamp": "2025-01-04T01:32:22.424508",
-      "cpu_percent": 74.47,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.403045654296875,
-      "gpu_memory_used": 1274.0,
-      "relative_time": 115.52539491653442
-    },
-    {
-      "timestamp": "2025-01-04T01:32:23.525673",
-      "cpu_percent": 67.2,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.484474182128906,
-      "gpu_memory_used": 1273.0,
-      "relative_time": 116.61319661140442
-    },
-    {
-      "timestamp": "2025-01-04T01:32:24.613302",
-      "cpu_percent": 57.41,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.35879135131836,
-      "gpu_memory_used": 1270.0,
-      "relative_time": 117.72619676589966
-    },
-    {
-      "timestamp": "2025-01-04T01:32:25.730732",
-      "cpu_percent": 45.97,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.347335815429688,
-      "gpu_memory_used": 1270.0,
-      "relative_time": 118.84320116043091
-    },
-    {
-      "timestamp": "2025-01-04T01:32:26.845420",
-      "cpu_percent": 47.74,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.354007720947266,
-      "gpu_memory_used": 1265.0,
-      "relative_time": 119.96074485778809
-    },
-    {
-      "timestamp": "2025-01-04T01:32:27.964248",
-      "cpu_percent": 60.0,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.3675537109375,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 121.09439873695374
-    },
-    {
-      "timestamp": "2025-01-04T01:32:29.094542",
-      "cpu_percent": 54.46,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.367305755615234,
-      "gpu_memory_used": 1230.0,
-      "relative_time": 122.24102592468262
-    },
-    {
-      "timestamp": "2025-01-04T01:32:30.244200",
-      "cpu_percent": 56.21,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.364959716796875,
-      "gpu_memory_used": 1230.0,
-      "relative_time": 123.34450554847717
-    },
-    {
-      "timestamp": "2025-01-04T01:32:31.346103",
-      "cpu_percent": 40.66,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.420738220214844,
-      "gpu_memory_used": 1235.0,
-      "relative_time": 124.46777892112732
-    },
-    {
-      "timestamp": "2025-01-04T01:32:32.463710",
-      "cpu_percent": 51.66,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.396198272705078,
-      "gpu_memory_used": 1235.0,
-      "relative_time": 125.57916116714478
-    },
-    {
-      "timestamp": "2025-01-04T01:32:33.580811",
-      "cpu_percent": 49.68,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.40151596069336,
-      "gpu_memory_used": 1236.0,
-      "relative_time": 126.6768786907196
-    },
-    {
-      "timestamp": "2025-01-04T01:32:34.668960",
-      "cpu_percent": 49.09,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.380916595458984,
-      "gpu_memory_used": 1236.0,
-      "relative_time": 127.73568296432495
-    },
-    {
-      "timestamp": "2025-01-04T01:32:35.729484",
-      "cpu_percent": 48.53,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.385761260986328,
-      "gpu_memory_used": 1236.0,
-      "relative_time": 128.85891699790955
-    },
-    {
-      "timestamp": "2025-01-04T01:32:36.849812",
-      "cpu_percent": 52.39,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.414752960205078,
-      "gpu_memory_used": 1235.0,
-      "relative_time": 129.9150390625
-    },
-    {
-      "timestamp": "2025-01-04T01:32:37.919974",
-      "cpu_percent": 46.89,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.450218200683594,
-      "gpu_memory_used": 1235.0,
-      "relative_time": 131.00502228736877
-    },
-    {
-      "timestamp": "2025-01-04T01:32:39.008115",
-      "cpu_percent": 46.59,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.436458587646484,
-      "gpu_memory_used": 1235.0,
-      "relative_time": 132.10191130638123
-    },
-    {
-      "timestamp": "2025-01-04T01:32:40.095463",
-      "cpu_percent": 45.76,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.443893432617188,
-      "gpu_memory_used": 1224.0,
-      "relative_time": 133.26839780807495
-    },
-    {
-      "timestamp": "2025-01-04T01:32:41.265737",
-      "cpu_percent": 56.94,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.41216278076172,
-      "gpu_memory_used": 1224.0,
-      "relative_time": 134.32926607131958
-    },
-    {
-      "timestamp": "2025-01-04T01:32:42.321015",
-      "cpu_percent": 40.36,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.386669158935547,
-      "gpu_memory_used": 1224.0,
-      "relative_time": 135.40537309646606
-    },
-    {
-      "timestamp": "2025-01-04T01:32:43.400382",
-      "cpu_percent": 44.51,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.39049530029297,
-      "gpu_memory_used": 1224.0,
-      "relative_time": 136.52469301223755
-    },
-    {
-      "timestamp": "2025-01-04T01:32:44.524119",
-      "cpu_percent": 50.29,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.38903045654297,
-      "gpu_memory_used": 1220.0,
-      "relative_time": 137.60522270202637
-    },
-    {
-      "timestamp": "2025-01-04T01:32:45.599869",
-      "cpu_percent": 51.69,
-      "ram_percent": 47.8,
-      "ram_used_gb": 30.378681182861328,
-      "gpu_memory_used": 1213.0,
-      "relative_time": 138.7130560874939
-    },
-    {
-      "timestamp": "2025-01-04T01:32:46.711674",
-      "cpu_percent": 49.55,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.34076690673828,
-      "gpu_memory_used": 1213.0,
-      "relative_time": 139.8105547428131
-    },
-    {
-      "timestamp": "2025-01-04T01:32:47.813091",
-      "cpu_percent": 44.5,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.343746185302734,
-      "gpu_memory_used": 1213.0,
-      "relative_time": 140.91643166542053
-    },
-    {
-      "timestamp": "2025-01-04T01:32:48.917679",
-      "cpu_percent": 43.76,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.354793548583984,
-      "gpu_memory_used": 1213.0,
-      "relative_time": 142.04264283180237
-    },
-    {
-      "timestamp": "2025-01-04T01:32:50.047653",
-      "cpu_percent": 48.41,
-      "ram_percent": 47.7,
-      "ram_used_gb": 30.361080169677734,
-      "gpu_memory_used": 1219.0,
-      "relative_time": 143.14667677879333
-    },
-    {
-      "timestamp": "2025-01-04T01:32:51.153490",
-      "cpu_percent": 57.01,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.4390869140625,
-      "gpu_memory_used": 1232.0,
-      "relative_time": 144.2709481716156
-    },
-    {
-      "timestamp": "2025-01-04T01:32:52.272196",
-      "cpu_percent": 54.69,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.46664047241211,
-      "gpu_memory_used": 1236.0,
-      "relative_time": 145.36933588981628
-    },
-    {
-      "timestamp": "2025-01-04T01:32:53.374563",
-      "cpu_percent": 51.37,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.487388610839844,
-      "gpu_memory_used": 1245.0,
-      "relative_time": 146.4400930404663
-    },
-    {
-      "timestamp": "2025-01-04T01:32:54.445178",
-      "cpu_percent": 47.76,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.475635528564453,
-      "gpu_memory_used": 1241.0,
-      "relative_time": 147.5295627117157
-    },
-    {
-      "timestamp": "2025-01-04T01:32:55.520495",
-      "cpu_percent": 49.24,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.47634506225586,
-      "gpu_memory_used": 1236.0,
-      "relative_time": 148.5926468372345
-    },
-    {
-      "timestamp": "2025-01-04T01:32:56.591995",
-      "cpu_percent": 53.63,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.49687957763672,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 149.72928547859192
-    },
-    {
-      "timestamp": "2025-01-04T01:32:57.727346",
-      "cpu_percent": 65.04,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.59111785888672,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 150.86237502098083
-    },
-    {
-      "timestamp": "2025-01-04T01:32:58.862812",
-      "cpu_percent": 71.05,
-      "ram_percent": 48.2,
-      "ram_used_gb": 30.633731842041016,
-      "gpu_memory_used": 1263.0,
-      "relative_time": 152.03348207473755
-    },
-    {
-      "timestamp": "2025-01-04T01:33:00.037915",
-      "cpu_percent": 85.87,
-      "ram_percent": 48.2,
-      "ram_used_gb": 30.68001937866211,
-      "gpu_memory_used": 1253.0,
-      "relative_time": 153.1551034450531
-    },
-    {
-      "timestamp": "2025-01-04T01:33:01.158119",
-      "cpu_percent": 59.8,
-      "ram_percent": 48.2,
-      "ram_used_gb": 30.69198989868164,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 154.2606840133667
-    },
-    {
-      "timestamp": "2025-01-04T01:33:02.262390",
-      "cpu_percent": 45.33,
-      "ram_percent": 48.3,
-      "ram_used_gb": 30.743839263916016,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 155.3663365840912
-    },
-    {
-      "timestamp": "2025-01-04T01:33:03.369936",
-      "cpu_percent": 35.41,
-      "ram_percent": 48.2,
-      "ram_used_gb": 30.68472671508789,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 156.4842345714569
-    },
-    {
-      "timestamp": "2025-01-04T01:33:04.488089",
-      "cpu_percent": 47.22,
-      "ram_percent": 48.4,
-      "ram_used_gb": 30.78485870361328,
-      "gpu_memory_used": 1254.0,
-      "relative_time": 157.58868670463562
-    },
-    {
-      "timestamp": "2025-01-04T01:33:05.592303",
-      "cpu_percent": 36.14,
-      "ram_percent": 48.5,
-      "ram_used_gb": 30.87320327758789,
-      "gpu_memory_used": 1254.0,
-      "relative_time": 158.71629786491394
-    },
-    {
-      "timestamp": "2025-01-04T01:33:06.721317",
-      "cpu_percent": 38.46,
-      "ram_percent": 48.2,
-      "ram_used_gb": 30.668170928955078,
-      "gpu_memory_used": 1254.0,
-      "relative_time": 159.82655477523804
-    },
-    {
-      "timestamp": "2025-01-04T01:33:07.827187",
-      "cpu_percent": 35.81,
-      "ram_percent": 48.4,
-      "ram_used_gb": 30.777912139892578,
-      "gpu_memory_used": 1254.0,
-      "relative_time": 160.94229197502136
-    },
-    {
-      "timestamp": "2025-01-04T01:33:08.943035",
-      "cpu_percent": 39.24,
-      "ram_percent": 48.5,
-      "ram_used_gb": 30.86941146850586,
-      "gpu_memory_used": 1254.0,
-      "relative_time": 162.06378889083862
-    },
-    {
-      "timestamp": "2025-01-04T01:33:10.063208",
-      "cpu_percent": 51.52,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.624229431152344,
-      "gpu_memory_used": 1254.0,
-      "relative_time": 163.16198420524597
-    },
-    {
-      "timestamp": "2025-01-04T01:33:11.163067",
-      "cpu_percent": 48.99,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.612281799316406,
-      "gpu_memory_used": 1254.0,
-      "relative_time": 164.26579809188843
-    },
-    {
-      "timestamp": "2025-01-04T01:33:12.266417",
-      "cpu_percent": 46.27,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.584861755371094,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 165.35981583595276
-    },
-    {
-      "timestamp": "2025-01-04T01:33:13.354673",
-      "cpu_percent": 45.71,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.582279205322266,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 166.45263361930847
-    },
-    {
-      "timestamp": "2025-01-04T01:33:14.447308",
-      "cpu_percent": 48.69,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.584793090820312,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 167.54857754707336
-    },
-    {
-      "timestamp": "2025-01-04T01:33:15.552042",
-      "cpu_percent": 48.66,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.580883026123047,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 168.659592628479
-    },
-    {
-      "timestamp": "2025-01-04T01:33:16.653015",
-      "cpu_percent": 50.37,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.573726654052734,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 169.7969992160797
-    },
-    {
-      "timestamp": "2025-01-04T01:33:17.802854",
-      "cpu_percent": 49.45,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.587318420410156,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 170.891606092453
-    },
-    {
-      "timestamp": "2025-01-04T01:33:18.893192",
-      "cpu_percent": 50.16,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.5953369140625,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 172.0133557319641
-    },
-    {
-      "timestamp": "2025-01-04T01:33:20.008593",
-      "cpu_percent": 47.57,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.6124267578125,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 173.0913679599762
-    },
-    {
-      "timestamp": "2025-01-04T01:33:21.097576",
-      "cpu_percent": 44.32,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.584686279296875,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 174.20030999183655
-    },
-    {
-      "timestamp": "2025-01-04T01:33:22.201335",
-      "cpu_percent": 49.01,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.547630310058594,
-      "gpu_memory_used": 1252.0,
-      "relative_time": 175.30235862731934
-    },
-    {
-      "timestamp": "2025-01-04T01:33:23.306131",
-      "cpu_percent": 43.7,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.559757232666016,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 176.40550017356873
-    },
-    {
-      "timestamp": "2025-01-04T01:33:24.408896",
-      "cpu_percent": 48.77,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.5601806640625,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 177.4984576702118
-    },
-    {
-      "timestamp": "2025-01-04T01:33:25.496705",
-      "cpu_percent": 50.56,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.556926727294922,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 178.58782863616943
-    },
-    {
-      "timestamp": "2025-01-04T01:33:26.588438",
-      "cpu_percent": 47.76,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.53600311279297,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 179.67969870567322
-    },
-    {
-      "timestamp": "2025-01-04T01:33:27.679807",
-      "cpu_percent": 49.0,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.540546417236328,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 180.78388810157776
-    },
-    {
-      "timestamp": "2025-01-04T01:33:28.780263",
-      "cpu_percent": 49.25,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.55233383178711,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 181.88185930252075
-    },
-    {
-      "timestamp": "2025-01-04T01:33:29.881869",
-      "cpu_percent": 47.08,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.56603240966797,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 182.9666450023651
-    },
-    {
-      "timestamp": "2025-01-04T01:33:30.957821",
-      "cpu_percent": 45.77,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.559410095214844,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 184.05338644981384
-    },
-    {
-      "timestamp": "2025-01-04T01:33:32.047377",
-      "cpu_percent": 50.79,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.534175872802734,
-      "gpu_memory_used": 1251.0,
-      "relative_time": 185.17484974861145
-    },
-    {
-      "timestamp": "2025-01-04T01:33:33.167413",
-      "cpu_percent": 52.13,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.54046630859375,
-      "gpu_memory_used": 1266.0,
-      "relative_time": 186.23550605773926
-    },
-    {
-      "timestamp": "2025-01-04T01:33:34.226743",
-      "cpu_percent": 43.81,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.54621124267578,
-      "gpu_memory_used": 1266.0,
-      "relative_time": 187.30887961387634
-    },
-    {
-      "timestamp": "2025-01-04T01:33:35.303398",
-      "cpu_percent": 49.28,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.545230865478516,
-      "gpu_memory_used": 1264.0,
-      "relative_time": 188.40410709381104
-    },
-    {
-      "timestamp": "2025-01-04T01:33:36.405660",
-      "cpu_percent": 46.44,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.540679931640625,
-      "gpu_memory_used": 1264.0,
-      "relative_time": 189.47515082359314
-    },
-    {
-      "timestamp": "2025-01-04T01:33:37.469955",
-      "cpu_percent": 41.6,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.562320709228516,
-      "gpu_memory_used": 1264.0,
-      "relative_time": 190.56309294700623
-    },
-    {
-      "timestamp": "2025-01-04T01:33:38.556728",
-      "cpu_percent": 50.52,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.561084747314453,
-      "gpu_memory_used": 1264.0,
-      "relative_time": 191.66572499275208
-    },
-    {
-      "timestamp": "2025-01-04T01:33:39.665385",
-      "cpu_percent": 40.93,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.577682495117188,
-      "gpu_memory_used": 1264.0,
-      "relative_time": 192.76011109352112
-    },
-    {
-      "timestamp": "2025-01-04T01:33:40.754482",
-      "cpu_percent": 50.46,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.5740966796875,
-      "gpu_memory_used": 1262.0,
-      "relative_time": 193.90924453735352
-    },
-    {
-      "timestamp": "2025-01-04T01:33:41.903437",
-      "cpu_percent": 52.75,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.58869171142578,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 195.0148274898529
-    },
-    {
-      "timestamp": "2025-01-04T01:33:43.008520",
-      "cpu_percent": 50.04,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.560386657714844,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 196.12349009513855
-    },
-    {
-      "timestamp": "2025-01-04T01:33:44.129194",
-      "cpu_percent": 51.56,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.572277069091797,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 197.20997285842896
-    },
-    {
-      "timestamp": "2025-01-04T01:33:45.212927",
-      "cpu_percent": 47.77,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.556873321533203,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 198.29724264144897
-    },
-    {
-      "timestamp": "2025-01-04T01:33:46.288883",
-      "cpu_percent": 46.07,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.554439544677734,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 199.39549779891968
-    },
-    {
-      "timestamp": "2025-01-04T01:33:47.403171",
-      "cpu_percent": 46.18,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.557025909423828,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 200.50221276283264
-    },
-    {
-      "timestamp": "2025-01-04T01:33:48.495515",
-      "cpu_percent": 48.09,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.558856964111328,
-      "gpu_memory_used": 1267.0,
-      "relative_time": 201.62405467033386
-    },
-    {
-      "timestamp": "2025-01-04T01:33:49.630725",
-      "cpu_percent": 53.47,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.59896469116211,
-      "gpu_memory_used": 1283.0,
-      "relative_time": 202.70162987709045
-    },
-    {
-      "timestamp": "2025-01-04T01:33:50.709226",
-      "cpu_percent": 44.74,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.581470489501953,
-      "gpu_memory_used": 1281.0,
-      "relative_time": 203.78962469100952
-    },
-    {
-      "timestamp": "2025-01-04T01:33:51.782302",
-      "cpu_percent": 43.4,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.582977294921875,
-      "gpu_memory_used": 1282.0,
-      "relative_time": 204.87054562568665
-    },
-    {
-      "timestamp": "2025-01-04T01:33:52.868020",
-      "cpu_percent": 51.75,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.540206909179688,
-      "gpu_memory_used": 1282.0,
-      "relative_time": 205.95602416992188
-    },
-    {
-      "timestamp": "2025-01-04T01:33:53.956023",
-      "cpu_percent": 46.36,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.562763214111328,
-      "gpu_memory_used": 1282.0,
-      "relative_time": 207.06639337539673
-    },
-    {
-      "timestamp": "2025-01-04T01:33:55.064043",
-      "cpu_percent": 43.91,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.560302734375,
-      "gpu_memory_used": 1277.0,
-      "relative_time": 208.16699743270874
-    },
-    {
-      "timestamp": "2025-01-04T01:33:56.170674",
-      "cpu_percent": 50.01,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.576671600341797,
-      "gpu_memory_used": 1281.0,
-      "relative_time": 209.28660559654236
-    },
-    {
-      "timestamp": "2025-01-04T01:33:57.288316",
-      "cpu_percent": 50.51,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.551471710205078,
-      "gpu_memory_used": 1280.0,
-      "relative_time": 210.4030442237854
-    },
-    {
-      "timestamp": "2025-01-04T01:33:58.407032",
-      "cpu_percent": 49.43,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.576725006103516,
-      "gpu_memory_used": 1280.0,
-      "relative_time": 211.50494027137756
-    },
-    {
-      "timestamp": "2025-01-04T01:33:59.497806",
-      "cpu_percent": 46.68,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.59314727783203,
-      "gpu_memory_used": 1279.0,
-      "relative_time": 212.6002950668335
-    },
-    {
-      "timestamp": "2025-01-04T01:34:00.598484",
-      "cpu_percent": 57.44,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.60983657836914,
-      "gpu_memory_used": 1285.0,
-      "relative_time": 213.7150914669037
-    },
-    {
-      "timestamp": "2025-01-04T01:34:01.719968",
-      "cpu_percent": 54.58,
-      "ram_percent": 48.1,
-      "ram_used_gb": 30.586456298828125,
-      "gpu_memory_used": 1283.0,
-      "relative_time": 214.80932760238647
-    },
-    {
-      "timestamp": "2025-01-04T01:34:02.807573",
-      "cpu_percent": 61.69,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.53356170654297,
-      "gpu_memory_used": 1281.0,
-      "relative_time": 215.88946890830994
-    },
-    {
-      "timestamp": "2025-01-04T01:34:03.885672",
-      "cpu_percent": 49.46,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.517364501953125,
-      "gpu_memory_used": 1283.0,
-      "relative_time": 216.97114062309265
-    },
-    {
-      "timestamp": "2025-01-04T01:34:04.974449",
-      "cpu_percent": 42.69,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.527969360351562,
-      "gpu_memory_used": 1285.0,
-      "relative_time": 218.10192775726318
-    },
-    {
-      "timestamp": "2025-01-04T01:34:06.107947",
-      "cpu_percent": 54.87,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.51028823852539,
-      "gpu_memory_used": 1273.0,
-      "relative_time": 219.17600679397583
-    },
-    {
-      "timestamp": "2025-01-04T01:34:07.172153",
-      "cpu_percent": 45.42,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.509258270263672,
-      "gpu_memory_used": 1273.0,
-      "relative_time": 220.28902983665466
-    },
-    {
-      "timestamp": "2025-01-04T01:34:08.289623",
-      "cpu_percent": 52.75,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.52011489868164,
-      "gpu_memory_used": 1272.0,
-      "relative_time": 221.39960098266602
-    },
-    {
-      "timestamp": "2025-01-04T01:34:09.406158",
-      "cpu_percent": 52.53,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.52783966064453,
-      "gpu_memory_used": 1265.0,
-      "relative_time": 222.49749565124512
-    },
-    {
-      "timestamp": "2025-01-04T01:34:10.491042",
-      "cpu_percent": 56.49,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.540733337402344,
-      "gpu_memory_used": 1261.0,
-      "relative_time": 223.5777132511139
-    },
-    {
-      "timestamp": "2025-01-04T01:34:11.577710",
-      "cpu_percent": 44.25,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.531757354736328,
-      "gpu_memory_used": 1262.0,
-      "relative_time": 224.68288159370422
-    },
-    {
-      "timestamp": "2025-01-04T01:34:12.682455",
-      "cpu_percent": 47.56,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.50157928466797,
-      "gpu_memory_used": 1262.0,
-      "relative_time": 225.78287291526794
-    },
-    {
-      "timestamp": "2025-01-04T01:34:13.782976",
-      "cpu_percent": 48.52,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.507736206054688,
-      "gpu_memory_used": 1262.0,
-      "relative_time": 226.8910207748413
-    },
-    {
-      "timestamp": "2025-01-04T01:34:14.884200",
-      "cpu_percent": 49.89,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.50653076171875,
-      "gpu_memory_used": 1263.0,
-      "relative_time": 228.04418087005615
-    },
-    {
-      "timestamp": "2025-01-04T01:34:16.051189",
-      "cpu_percent": 49.34,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.504470825195312,
-      "gpu_memory_used": 1263.0,
-      "relative_time": 229.13680815696716
-    },
-    {
-      "timestamp": "2025-01-04T01:34:17.136588",
-      "cpu_percent": 47.8,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.496841430664062,
-      "gpu_memory_used": 1263.0,
-      "relative_time": 230.26778984069824
-    },
-    {
-      "timestamp": "2025-01-04T01:34:18.269616",
-      "cpu_percent": 48.23,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.50909423828125,
-      "gpu_memory_used": 1262.0,
-      "relative_time": 231.3880865573883
-    },
-    {
-      "timestamp": "2025-01-04T01:34:19.387759",
-      "cpu_percent": 42.46,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.525142669677734,
-      "gpu_memory_used": 1262.0,
-      "relative_time": 232.4770486354828
-    },
-    {
-      "timestamp": "2025-01-04T01:34:20.471629",
-      "cpu_percent": 44.17,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.535388946533203,
-      "gpu_memory_used": 1259.0,
-      "relative_time": 233.57954168319702
-    },
-    {
-      "timestamp": "2025-01-04T01:34:21.576615",
-      "cpu_percent": 45.36,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.529708862304688,
-      "gpu_memory_used": 1259.0,
-      "relative_time": 234.70528435707092
-    },
-    {
-      "timestamp": "2025-01-04T01:34:22.709825",
-      "cpu_percent": 52.14,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.490406036376953,
-      "gpu_memory_used": 1259.0,
-      "relative_time": 235.84367108345032
-    },
-    {
-      "timestamp": "2025-01-04T01:34:23.834912",
-      "cpu_percent": 49.39,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.49042510986328,
-      "gpu_memory_used": 1259.0,
-      "relative_time": 236.94777131080627
-    },
-    {
-      "timestamp": "2025-01-04T01:34:24.940884",
-      "cpu_percent": 51.84,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.489459991455078,
-      "gpu_memory_used": 1259.0,
-      "relative_time": 238.07107305526733
-    },
-    {
-      "timestamp": "2025-01-04T01:34:26.077527",
-      "cpu_percent": 49.55,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.488842010498047,
-      "gpu_memory_used": 1259.0,
-      "relative_time": 239.20314645767212
-    },
-    {
-      "timestamp": "2025-01-04T01:34:27.199360",
-      "cpu_percent": 47.71,
-      "ram_percent": 47.9,
-      "ram_used_gb": 30.49380874633789,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 240.32860612869263
-    },
-    {
-      "timestamp": "2025-01-04T01:34:28.333600",
-      "cpu_percent": 48.61,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.503887176513672,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 241.44983053207397
-    },
-    {
-      "timestamp": "2025-01-04T01:34:29.453855",
-      "cpu_percent": 51.01,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.512046813964844,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 242.60703372955322
-    },
-    {
-      "timestamp": "2025-01-04T01:34:30.613699",
-      "cpu_percent": 53.89,
-      "ram_percent": 48.0,
-      "ram_used_gb": 30.522415161132812,
-      "gpu_memory_used": 1258.0,
-      "relative_time": 243.73219799995422
-    },
-    {
-      "timestamp": "2025-01-04T01:34:31.735503",
-      "cpu_percent": 21.25,
-      "ram_percent": 48.2,
-      "ram_used_gb": 30.68771743774414,
-      "gpu_memory_used": 1260.0,
-      "relative_time": 244.80069231987
-    }
-  ],
-  "test_duration": 247.14976453781128
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt
deleted file mode 100644
index 541a304..0000000
--- a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-=== Benchmark Statistics (with correct RTF) ===
-
-Total tokens processed: 1800
-Total audio generated (s): 568.53
-Total test duration (s): 244.10
-Average processing rate (tokens/s): 7.34
-Average RTF: 0.43
-Average Real Time Speed: 2.33
-
-=== Per-chunk Stats ===
-
-Average chunk size (tokens): 600.00
-Min chunk size (tokens): 300
-Max chunk size (tokens): 900
-Average processing time (s): 81.30
-Average output length (s): 189.51
-
-=== Performance Ranges ===
-
-Processing rate range (tokens/s): 7.21 - 7.47
-RTF range: 0.43x - 0.43x
-Real Time Speed range: 2.33x - 2.33x
-
diff --git a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark.json b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark.json
deleted file mode 100644
index ae10c23..0000000
--- a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark.json
+++ /dev/null
@@ -1,403 +0,0 @@
-{
-  "individual_runs": [
-    {
-      "text_length": 37,
-      "token_count": 10,
-      "total_time": 0.16574740409851074,
-      "time_to_first_chunk": 0.16574740409851074,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run1.wav",
-      "audio_length": 3.45,
-      "target_tokens": 10,
-      "actual_tokens": 10,
-      "run_number": 1
-    },
-    {
-      "text_length": 37,
-      "token_count": 10,
-      "total_time": 0.18812799453735352,
-      "time_to_first_chunk": 0.18812799453735352,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run2.wav",
-      "audio_length": 3.45,
-      "target_tokens": 10,
-      "actual_tokens": 10,
-      "run_number": 2
-    },
-    {
-      "text_length": 37,
-      "token_count": 10,
-      "total_time": 0.18645429611206055,
-      "time_to_first_chunk": 0.18645429611206055,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run3.wav",
-      "audio_length": 3.45,
-      "target_tokens": 10,
-      "actual_tokens": 10,
-      "run_number": 3
-    },
-    {
-      "text_length": 37,
-      "token_count": 10,
-      "total_time": 0.17632031440734863,
-      "time_to_first_chunk": 0.17632031440734863,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run4.wav",
-      "audio_length": 3.45,
-      "target_tokens": 10,
-      "actual_tokens": 10,
-      "run_number": 4
-    },
-    {
-      "text_length": 37,
-      "token_count": 10,
-      "total_time": 0.13381195068359375,
-      "time_to_first_chunk": 0.13381195068359375,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run5.wav",
-      "audio_length": 3.45,
-      "target_tokens": 10,
-      "actual_tokens": 10,
-      "run_number": 5
-    },
-    {
-      "text_length": 102,
-      "token_count": 25,
-      "total_time": 0.2086498737335205,
-      "time_to_first_chunk": 0.2086498737335205,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run1.wav",
-      "audio_length": 7.225,
-      "target_tokens": 25,
-      "actual_tokens": 25,
-      "run_number": 1
-    },
-    {
-      "text_length": 102,
-      "token_count": 25,
-      "total_time": 0.2727653980255127,
-      "time_to_first_chunk": 0.2727653980255127,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run2.wav",
-      "audio_length": 7.225,
-      "target_tokens": 25,
-      "actual_tokens": 25,
-      "run_number": 2
-    },
-    {
-      "text_length": 102,
-      "token_count": 25,
-      "total_time": 0.2096250057220459,
-      "time_to_first_chunk": 0.2096250057220459,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run3.wav",
-      "audio_length": 7.225,
-      "target_tokens": 25,
-      "actual_tokens": 25,
-      "run_number": 3
-    },
-    {
-      "text_length": 102,
-      "token_count": 25,
-      "total_time": 0.2256758213043213,
-      "time_to_first_chunk": 0.2256758213043213,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run4.wav",
-      "audio_length": 7.225,
-      "target_tokens": 25,
-      "actual_tokens": 25,
-      "run_number": 4
-    },
-    {
-      "text_length": 102,
-      "token_count": 25,
-      "total_time": 0.1945042610168457,
-      "time_to_first_chunk": 0.1945042610168457,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run5.wav",
-      "audio_length": 7.225,
-      "target_tokens": 25,
-      "actual_tokens": 25,
-      "run_number": 5
-    },
-    {
-      "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.4975121021270752,
-      "time_to_first_chunk": 0.4975121021270752,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run1.wav",
-      "audio_length": 16.325,
-      "target_tokens": 50,
-      "actual_tokens": 50,
-      "run_number": 1
-    },
-    {
-      "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.4518404006958008,
-      "time_to_first_chunk": 0.4518404006958008,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run2.wav",
-      "audio_length": 16.325,
-      "target_tokens": 50,
-      "actual_tokens": 50,
-      "run_number": 2
-    },
-    {
-      "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.5640325546264648,
-      "time_to_first_chunk": 0.5640325546264648,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run3.wav",
-      "audio_length": 16.325,
-      "target_tokens": 50,
-      "actual_tokens": 50,
-      "run_number": 3
-    },
-    {
-      "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.5305957794189453,
-      "time_to_first_chunk": 0.5305957794189453,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run4.wav",
-      "audio_length": 16.325,
-      "target_tokens": 50,
-      "actual_tokens": 50,
-      "run_number": 4
-    },
-    {
-      "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.5540030002593994,
-      "time_to_first_chunk": 0.5540030002593994,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run5.wav",
-      "audio_length": 16.325,
-      "target_tokens": 50,
-      "actual_tokens": 50,
-      "run_number": 5
-    },
-    {
-      "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.7963137626647949,
-      "time_to_first_chunk": 0.7963137626647949,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run1.wav",
-      "audio_length": 31.1,
-      "target_tokens": 100,
-      "actual_tokens": 100,
-      "run_number": 1
-    },
-    {
-      "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.9320805072784424,
-      "time_to_first_chunk": 0.9320805072784424,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run2.wav",
-      "audio_length": 31.1,
-      "target_tokens": 100,
-      "actual_tokens": 100,
-      "run_number": 2
-    },
-    {
-      "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.824256181716919,
-      "time_to_first_chunk": 0.824256181716919,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run3.wav",
-      "audio_length": 31.1,
-      "target_tokens": 100,
-      "actual_tokens": 100,
-      "run_number": 3
-    },
-    {
-      "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.9034836292266846,
-      "time_to_first_chunk": 0.9034836292266846,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run4.wav",
-      "audio_length": 31.1,
-      "target_tokens": 100,
-      "actual_tokens": 100,
-      "run_number": 4
-    },
-    {
-      "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.8364357948303223,
-      "time_to_first_chunk": 0.8364357948303223,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run5.wav",
-      "audio_length": 31.1,
-      "target_tokens": 100,
-      "actual_tokens": 100,
-      "run_number": 5
-    },
-    {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.8122682571411133,
-      "time_to_first_chunk": 1.8122682571411133,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run1.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
-      "run_number": 1
-    },
-    {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.7290427684783936,
-      "time_to_first_chunk": 1.7290427684783936,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run2.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
-      "run_number": 2
-    },
-    {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 2.141728401184082,
-      "time_to_first_chunk": 2.141728401184082,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run3.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
-      "run_number": 3
-    },
-    {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 2.0155680179595947,
-      "time_to_first_chunk": 2.0155680179595947,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run4.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
-      "run_number": 4
-    },
-    {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.8707575798034668,
-      "time_to_first_chunk": 1.8707575798034668,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run5.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
-      "run_number": 5
-    },
-    {
-      "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.822713851928711,
-      "time_to_first_chunk": 4.822713851928711,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run1.wav",
-      "audio_length": 157.875,
-      "target_tokens": 500,
-      "actual_tokens": 500,
-      "run_number": 1
-    },
-    {
-      "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.227782726287842,
-      "time_to_first_chunk": 4.227782726287842,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run2.wav",
-      "audio_length": 157.875,
-      "target_tokens": 500,
-      "actual_tokens": 500,
-      "run_number": 2
-    },
-    {
-      "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.414916276931763,
-      "time_to_first_chunk": 4.414916276931763,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run3.wav",
-      "audio_length": 157.875,
-      "target_tokens": 500,
-      "actual_tokens": 500,
-      "run_number": 3
-    },
-    {
-      "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.579505681991577,
-      "time_to_first_chunk": 4.579505681991577,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run4.wav",
-      "audio_length": 157.875,
-      "target_tokens": 500,
-      "actual_tokens": 500,
-      "run_number": 4
-    },
-    {
-      "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.332529067993164,
-      "time_to_first_chunk": 4.332529067993164,
-      "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run5.wav",
-      "audio_length": 157.875,
-      "target_tokens": 500,
-      "actual_tokens": 500,
-      "run_number": 5
-    }
-  ],
-  "summary": {
-    "10": {
-      "avg_time_to_first_chunk": 0.17,
-      "avg_total_time": 0.17,
-      "avg_audio_length": 3.45,
-      "num_successful_runs": 5
-    },
-    "25": {
-      "avg_time_to_first_chunk": 0.222,
-      "avg_total_time": 0.222,
-      "avg_audio_length": 7.225,
-      "num_successful_runs": 5
-    },
-    "50": {
-      "avg_time_to_first_chunk": 0.52,
-      "avg_total_time": 0.52,
-      "avg_audio_length": 16.325,
-      "num_successful_runs": 5
-    },
-    "100": {
-      "avg_time_to_first_chunk": 0.859,
-      "avg_total_time": 0.859,
-      "avg_audio_length": 31.1,
-      "num_successful_runs": 5
-    },
-    "200": {
-      "avg_time_to_first_chunk": 1.914,
-      "avg_total_time": 1.914,
-      "avg_audio_length": 62.625,
-      "num_successful_runs": 5
-    },
-    "500": {
-      "avg_time_to_first_chunk": 4.475,
-      "avg_total_time": 4.475,
-      "avg_audio_length": 157.875,
-      "num_successful_runs": 5
-    }
-  },
-  "timestamp": "2025-01-04 13:52:28"
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json
index c8bb092..7501fb9 100644
--- a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json
+++ b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json
@@ -1,271 +1,337 @@
 {
   "individual_runs": [
     {
-      "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.7278211116790771,
-      "time_to_first_chunk": 0.3613290786743164,
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.4376556873321533,
+      "time_to_first_chunk": 0.4189143180847168,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run1_stream.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run1_stream.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 1
+    },
+    {
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.37163758277893066,
+      "time_to_first_chunk": 0.34892702102661133,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run2_stream.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 2
+    },
+    {
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.2654602527618408,
+      "time_to_first_chunk": 0.2409076690673828,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run3_stream.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 3
+    },
+    {
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.24376440048217773,
+      "time_to_first_chunk": 0.23003816604614258,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run4_stream.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 4
+    },
+    {
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.25968003273010254,
+      "time_to_first_chunk": 0.24081206321716309,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run5_stream.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 5
+    },
+    {
+      "text_length": 212,
+      "token_count": null,
+      "total_time": 1.049060344696045,
+      "time_to_first_chunk": 0.3336215019226074,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run1_stream.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 1
     },
     {
       "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.4556088447570801,
-      "time_to_first_chunk": 0.18642044067382812,
+      "token_count": null,
+      "total_time": 0.8934676647186279,
+      "time_to_first_chunk": 0.3011031150817871,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run2_stream.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run2_stream.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 2
     },
     {
       "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.5538768768310547,
-      "time_to_first_chunk": 0.2720797061920166,
+      "token_count": null,
+      "total_time": 0.9444286823272705,
+      "time_to_first_chunk": 0.3198091983795166,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run3_stream.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run3_stream.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 3
     },
     {
       "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.4395604133605957,
-      "time_to_first_chunk": 0.15613913536071777,
+      "token_count": null,
+      "total_time": 0.9735183715820312,
+      "time_to_first_chunk": 0.369948148727417,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run4_stream.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run4_stream.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 4
     },
     {
       "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.45748305320739746,
-      "time_to_first_chunk": 0.18805718421936035,
+      "token_count": null,
+      "total_time": 0.8089118003845215,
+      "time_to_first_chunk": 0.30179858207702637,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run5_stream.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run5_stream.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 5
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.7347762584686279,
-      "time_to_first_chunk": 0.16963744163513184,
+      "token_count": null,
+      "total_time": 1.641003131866455,
+      "time_to_first_chunk": 0.2979745864868164,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run1_stream.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run1_stream.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 1
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.8288509845733643,
-      "time_to_first_chunk": 0.20123004913330078,
+      "token_count": null,
+      "total_time": 1.3709619045257568,
+      "time_to_first_chunk": 0.4272146224975586,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run2_stream.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run2_stream.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 2
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.7503848075866699,
-      "time_to_first_chunk": 0.21662068367004395,
+      "token_count": null,
+      "total_time": 1.2554471492767334,
+      "time_to_first_chunk": 0.29790568351745605,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run3_stream.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run3_stream.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 3
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.694899320602417,
-      "time_to_first_chunk": 0.1966841220855713,
+      "token_count": null,
+      "total_time": 1.3761844635009766,
+      "time_to_first_chunk": 0.32633328437805176,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run4_stream.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run4_stream.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 4
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 0.68701171875,
-      "time_to_first_chunk": 0.19341063499450684,
+      "token_count": null,
+      "total_time": 1.56705904006958,
+      "time_to_first_chunk": 0.32801246643066406,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run5_stream.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run5_stream.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 5
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.6845426559448242,
-      "time_to_first_chunk": 0.21096158027648926,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 5.086699962615967,
+      "time_to_first_chunk": 0.33925390243530273,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run1_stream.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run1_stream.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 1
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.3545098304748535,
-      "time_to_first_chunk": 0.18648386001586914,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 3.827953338623047,
+      "time_to_first_chunk": 0.39266157150268555,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run2_stream.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run2_stream.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 2
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.426060676574707,
-      "time_to_first_chunk": 0.20081472396850586,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 3.9389824867248535,
+      "time_to_first_chunk": 0.3231511116027832,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run3_stream.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run3_stream.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 3
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.4084081649780273,
-      "time_to_first_chunk": 0.18551135063171387,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 3.942399740219116,
+      "time_to_first_chunk": 0.34731340408325195,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run4_stream.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run4_stream.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 4
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.4703152179718018,
-      "time_to_first_chunk": 0.17750859260559082,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 3.7748308181762695,
+      "time_to_first_chunk": 0.40787601470947266,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run5_stream.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run5_stream.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 5
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.289574384689331,
-      "time_to_first_chunk": 0.1997976303100586,
+      "token_count": null,
+      "total_time": 9.003147840499878,
+      "time_to_first_chunk": 0.5455703735351562,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run1_stream.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run1_stream.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 1
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 3.7089381217956543,
-      "time_to_first_chunk": 0.25969815254211426,
+      "token_count": null,
+      "total_time": 10.081491231918335,
+      "time_to_first_chunk": 0.4591703414916992,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run2_stream.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run2_stream.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 2
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.138366222381592,
-      "time_to_first_chunk": 0.1831505298614502,
+      "token_count": null,
+      "total_time": 9.767668962478638,
+      "time_to_first_chunk": 0.31237053871154785,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run3_stream.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run3_stream.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 3
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 3.980635643005371,
-      "time_to_first_chunk": 0.20493030548095703,
+      "token_count": null,
+      "total_time": 9.090342998504639,
+      "time_to_first_chunk": 0.41753244400024414,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run4_stream.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run4_stream.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 4
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.1370298862457275,
-      "time_to_first_chunk": 0.19150757789611816,
+      "token_count": null,
+      "total_time": 9.876578330993652,
+      "time_to_first_chunk": 0.3965120315551758,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run5_stream.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run5_stream.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 5
     }
   ],
   "summary": {
+    "10": {
+      "avg_time_to_first_chunk": 0.296,
+      "avg_total_time": 0.316,
+      "avg_audio_length": 3.45,
+      "num_successful_runs": 5
+    },
     "50": {
-      "avg_time_to_first_chunk": 0.233,
-      "avg_total_time": 0.527,
-      "avg_audio_length": 16.325,
+      "avg_time_to_first_chunk": 0.325,
+      "avg_total_time": 0.934,
+      "avg_audio_length": 15.925,
       "num_successful_runs": 5
     },
     "100": {
-      "avg_time_to_first_chunk": 0.196,
-      "avg_total_time": 0.739,
-      "avg_audio_length": 31.1,
+      "avg_time_to_first_chunk": 0.335,
+      "avg_total_time": 1.442,
+      "avg_audio_length": 30.5,
       "num_successful_runs": 5
     },
-    "200": {
-      "avg_time_to_first_chunk": 0.192,
-      "avg_total_time": 1.469,
-      "avg_audio_length": 62.625,
+    "250": {
+      "avg_time_to_first_chunk": 0.362,
+      "avg_total_time": 4.114,
+      "avg_audio_length": 78.775,
       "num_successful_runs": 5
     },
     "500": {
-      "avg_time_to_first_chunk": 0.208,
-      "avg_total_time": 4.051,
-      "avg_audio_length": 157.875,
+      "avg_time_to_first_chunk": 0.426,
+      "avg_total_time": 9.564,
+      "avg_audio_length": 156.475,
       "num_successful_runs": 5
     }
   },
-  "timestamp": "2025-01-04 22:16:30"
+  "timestamp": "2025-01-06 00:00:43"
 }
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json
index b996231..179998d 100644
--- a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json
+++ b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json
@@ -1,271 +1,337 @@
 {
   "individual_runs": [
     {
-      "text_length": 212,
-      "token_count": 50,
-      "total_time": 1.149611473083496,
-      "time_to_first_chunk": 0.8767304420471191,
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.7105245590209961,
+      "time_to_first_chunk": 0.6905441284179688,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run1_stream_openai.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run1_stream_openai.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 1
+    },
+    {
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.35063982009887695,
+      "time_to_first_chunk": 0.32647228240966797,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run2_stream_openai.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 2
+    },
+    {
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.43519043922424316,
+      "time_to_first_chunk": 0.41011548042297363,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run3_stream_openai.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 3
+    },
+    {
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.33886170387268066,
+      "time_to_first_chunk": 0.32068943977355957,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run4_stream_openai.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 4
+    },
+    {
+      "text_length": 37,
+      "token_count": null,
+      "total_time": 0.31725525856018066,
+      "time_to_first_chunk": 0.29624342918395996,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run5_stream_openai.wav",
+      "audio_length": 3.45,
+      "target_tokens": 10,
+      "actual_tokens": 10,
+      "run_number": 5
+    },
+    {
+      "text_length": 212,
+      "token_count": null,
+      "total_time": 1.0215234756469727,
+      "time_to_first_chunk": 0.38323354721069336,
+      "error": null,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run1_stream_openai.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 1
     },
     {
       "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.9325947761535645,
-      "time_to_first_chunk": 0.5965914726257324,
+      "token_count": null,
+      "total_time": 1.38511061668396,
+      "time_to_first_chunk": 0.47052764892578125,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run2_stream_openai.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run2_stream_openai.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 2
     },
     {
       "text_length": 212,
-      "token_count": 50,
-      "total_time": 0.9205234050750732,
-      "time_to_first_chunk": 0.5961906909942627,
+      "token_count": null,
+      "total_time": 1.0185234546661377,
+      "time_to_first_chunk": 0.3535764217376709,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run3_stream_openai.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run3_stream_openai.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 3
     },
     {
       "text_length": 212,
-      "token_count": 50,
-      "total_time": 1.1321916580200195,
-      "time_to_first_chunk": 0.6946916580200195,
+      "token_count": null,
+      "total_time": 0.8875925540924072,
+      "time_to_first_chunk": 0.3373105525970459,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run4_stream_openai.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run4_stream_openai.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 4
     },
     {
       "text_length": 212,
-      "token_count": 50,
-      "total_time": 1.1146185398101807,
-      "time_to_first_chunk": 0.6918885707855225,
+      "token_count": null,
+      "total_time": 0.9557526111602783,
+      "time_to_first_chunk": 0.3364882469177246,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run5_stream_openai.wav",
-      "audio_length": 16.325,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run5_stream_openai.wav",
+      "audio_length": 15.925,
       "target_tokens": 50,
       "actual_tokens": 50,
       "run_number": 5
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 1.3645410537719727,
-      "time_to_first_chunk": 0.6802399158477783,
+      "token_count": null,
+      "total_time": 1.569596767425537,
+      "time_to_first_chunk": 0.42070746421813965,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run1_stream_openai.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run1_stream_openai.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 1
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 1.4154777526855469,
-      "time_to_first_chunk": 0.7297353744506836,
+      "token_count": null,
+      "total_time": 1.5172030925750732,
+      "time_to_first_chunk": 0.3982264995574951,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run2_stream_openai.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run2_stream_openai.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 2
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 1.3589520454406738,
-      "time_to_first_chunk": 0.698603630065918,
+      "token_count": null,
+      "total_time": 1.5318474769592285,
+      "time_to_first_chunk": 0.3533785343170166,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run3_stream_openai.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run3_stream_openai.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 3
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 1.2276430130004883,
-      "time_to_first_chunk": 0.6705801486968994,
+      "token_count": null,
+      "total_time": 1.3858752250671387,
+      "time_to_first_chunk": 0.3360786437988281,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run4_stream_openai.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run4_stream_openai.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 4
     },
     {
       "text_length": 448,
-      "token_count": 100,
-      "total_time": 1.0949454307556152,
-      "time_to_first_chunk": 0.5698442459106445,
+      "token_count": null,
+      "total_time": 1.7841475009918213,
+      "time_to_first_chunk": 0.34446048736572266,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run5_stream_openai.wav",
-      "audio_length": 31.1,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run5_stream_openai.wav",
+      "audio_length": 30.5,
       "target_tokens": 100,
       "actual_tokens": 100,
       "run_number": 5
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.8211240768432617,
-      "time_to_first_chunk": 0.6070489883422852,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 4.334965467453003,
+      "time_to_first_chunk": 0.4336512088775635,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run1_stream_openai.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run1_stream_openai.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 1
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.8376774787902832,
-      "time_to_first_chunk": 0.6538689136505127,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 5.265941858291626,
+      "time_to_first_chunk": 0.5461773872375488,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run2_stream_openai.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run2_stream_openai.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 2
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.6953792572021484,
-      "time_to_first_chunk": 0.5554308891296387,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 5.66066575050354,
+      "time_to_first_chunk": 0.4757547378540039,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run3_stream_openai.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run3_stream_openai.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 3
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.887030839920044,
-      "time_to_first_chunk": 0.5866930484771729,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 9.289174318313599,
+      "time_to_first_chunk": 0.40159058570861816,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run4_stream_openai.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run4_stream_openai.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 4
     },
     {
-      "text_length": 906,
-      "token_count": 200,
-      "total_time": 1.7908406257629395,
-      "time_to_first_chunk": 0.5897490978240967,
+      "text_length": 1140,
+      "token_count": null,
+      "total_time": 4.425869703292847,
+      "time_to_first_chunk": 0.40808558464050293,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run5_stream_openai.wav",
-      "audio_length": 62.625,
-      "target_tokens": 200,
-      "actual_tokens": 200,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run5_stream_openai.wav",
+      "audio_length": 78.775,
+      "target_tokens": 250,
+      "actual_tokens": 250,
       "run_number": 5
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.228837013244629,
-      "time_to_first_chunk": 0.5315976142883301,
+      "token_count": null,
+      "total_time": 9.600461483001709,
+      "time_to_first_chunk": 0.3966805934906006,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run1_stream_openai.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run1_stream_openai.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 1
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.489210367202759,
-      "time_to_first_chunk": 0.5261838436126709,
+      "token_count": null,
+      "total_time": 8.82239580154419,
+      "time_to_first_chunk": 0.3900904655456543,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run2_stream_openai.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run2_stream_openai.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 2
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.5290446281433105,
-      "time_to_first_chunk": 0.6186764240264893,
+      "token_count": null,
+      "total_time": 10.99152159690857,
+      "time_to_first_chunk": 0.4041757583618164,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run3_stream_openai.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run3_stream_openai.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 3
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.209261178970337,
-      "time_to_first_chunk": 0.5990591049194336,
+      "token_count": null,
+      "total_time": 9.12995958328247,
+      "time_to_first_chunk": 0.43430614471435547,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run4_stream_openai.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run4_stream_openai.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 4
     },
     {
       "text_length": 2232,
-      "token_count": 500,
-      "total_time": 4.218762636184692,
-      "time_to_first_chunk": 0.5466251373291016,
+      "token_count": null,
+      "total_time": 10.043727159500122,
+      "time_to_first_chunk": 0.41181445121765137,
       "error": null,
-      "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run5_stream_openai.wav",
-      "audio_length": 157.875,
+      "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run5_stream_openai.wav",
+      "audio_length": 156.475,
       "target_tokens": 500,
       "actual_tokens": 500,
       "run_number": 5
     }
   ],
   "summary": {
+    "10": {
+      "avg_time_to_first_chunk": 0.409,
+      "avg_total_time": 0.43,
+      "avg_audio_length": 3.45,
+      "num_successful_runs": 5
+    },
     "50": {
-      "avg_time_to_first_chunk": 0.691,
-      "avg_total_time": 1.05,
-      "avg_audio_length": 16.325,
+      "avg_time_to_first_chunk": 0.376,
+      "avg_total_time": 1.054,
+      "avg_audio_length": 15.925,
       "num_successful_runs": 5
     },
     "100": {
-      "avg_time_to_first_chunk": 0.67,
-      "avg_total_time": 1.292,
-      "avg_audio_length": 31.1,
+      "avg_time_to_first_chunk": 0.371,
+      "avg_total_time": 1.558,
+      "avg_audio_length": 30.5,
       "num_successful_runs": 5
     },
-    "200": {
-      "avg_time_to_first_chunk": 0.599,
-      "avg_total_time": 1.806,
-      "avg_audio_length": 62.625,
+    "250": {
+      "avg_time_to_first_chunk": 0.453,
+      "avg_total_time": 5.795,
+      "avg_audio_length": 78.775,
       "num_successful_runs": 5
     },
     "500": {
-      "avg_time_to_first_chunk": 0.564,
-      "avg_total_time": 4.335,
-      "avg_audio_length": 157.875,
+      "avg_time_to_first_chunk": 0.407,
+      "avg_total_time": 9.718,
+      "avg_audio_length": 156.475,
       "num_successful_runs": 5
     }
   },
-  "timestamp": "2025-01-04 22:18:03"
+  "timestamp": "2025-01-06 00:02:21"
 }
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json
index ccac37e..5a44ee5 100644
--- a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json
+++ b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json
@@ -2,1252 +2,592 @@
   "results": [
     {
       "tokens": 150,
-      "processing_time": 1.86,
-      "output_length": 45.9,
-      "rtf": 0.04,
-      "elapsed_time": 1.92
+      "processing_time": 2.8,
+      "output_length": 49.7,
+      "rtf": 0.06,
+      "elapsed_time": 2.88591
     },
     {
       "tokens": 300,
-      "processing_time": 3.08,
-      "output_length": 96.425,
-      "rtf": 0.03,
-      "elapsed_time": 5.06
+      "processing_time": 6.33,
+      "output_length": 100.225,
+      "rtf": 0.06,
+      "elapsed_time": 9.3164
     },
     {
       "tokens": 450,
-      "processing_time": 4.4,
-      "output_length": 143.1,
-      "rtf": 0.03,
-      "elapsed_time": 9.53
+      "processing_time": 11.23,
+      "output_length": 146.9,
+      "rtf": 0.08,
+      "elapsed_time": 20.69256
     },
     {
       "tokens": 600,
-      "processing_time": 6.47,
-      "output_length": 188.675,
-      "rtf": 0.03,
-      "elapsed_time": 16.06
+      "processing_time": 14.11,
+      "output_length": 198.7,
+      "rtf": 0.07,
+      "elapsed_time": 34.94436
     },
     {
       "tokens": 750,
-      "processing_time": 8.32,
-      "output_length": 236.7,
-      "rtf": 0.04,
-      "elapsed_time": 24.45
+      "processing_time": 14.97,
+      "output_length": 255.05,
+      "rtf": 0.06,
+      "elapsed_time": 50.10276
     },
     {
       "tokens": 900,
-      "processing_time": 8.92,
-      "output_length": 283.425,
-      "rtf": 0.03,
-      "elapsed_time": 33.45
-    },
-    {
-      "tokens": 2000,
-      "processing_time": 18.55,
-      "output_length": 624.325,
-      "rtf": 0.03,
-      "elapsed_time": 52.14
-    },
-    {
-      "tokens": 3000,
-      "processing_time": 23.98,
-      "output_length": 931.15,
-      "rtf": 0.03,
-      "elapsed_time": 76.32
-    },
-    {
-      "tokens": 4000,
-      "processing_time": 32.93,
-      "output_length": 1222.1,
-      "rtf": 0.03,
-      "elapsed_time": 109.53
-    },
-    {
-      "tokens": 5000,
-      "processing_time": 45.39,
-      "output_length": 1524.575,
-      "rtf": 0.03,
-      "elapsed_time": 155.23
+      "processing_time": 19.96,
+      "output_length": 305.45,
+      "rtf": 0.07,
+      "elapsed_time": 70.19825
     }
   ],
   "system_metrics": [
     {
-      "timestamp": "2025-01-04T02:37:52.172368",
-      "cpu_percent": 11.51,
-      "ram_percent": 52.8,
-      "ram_used_gb": 33.61172866821289,
-      "gpu_memory_used": 3216.0,
-      "relative_time": 0.08031892776489258
+      "timestamp": "2025-01-05T23:58:01.325720",
+      "cpu_percent": 80.02,
+      "ram_percent": 72.3,
+      "ram_used_gb": 45.9870491027832,
+      "gpu_memory_used": 6898.0,
+      "relative_time": 0.11059904098510742
     },
-    {
-      "timestamp": "2025-01-04T02:37:53.266071",
-      "cpu_percent": 15.33,
-      "ram_percent": 52.9,
-      "ram_used_gb": 33.678314208984375,
-      "gpu_memory_used": 3392.0,
-      "relative_time": 1.1673684120178223
+    {
+      "timestamp": "2025-01-05T23:58:02.435394",
+      "cpu_percent": 35.49,
+      "ram_percent": 72.5,
+      "ram_used_gb": 46.1334114074707,
+      "gpu_memory_used": 6900.0,
+      "relative_time": 1.1894314289093018
     },
-    {
-      "timestamp": "2025-01-04T02:37:54.352909",
-      "cpu_percent": 15.3,
-      "ram_percent": 53.0,
-      "ram_used_gb": 33.712764739990234,
-      "gpu_memory_used": 3667.0,
-      "relative_time": 2.253591537475586
+    {
+      "timestamp": "2025-01-05T23:58:03.515145",
+      "cpu_percent": 24.51,
+      "ram_percent": 72.8,
+      "ram_used_gb": 46.30204772949219,
+      "gpu_memory_used": 6898.0,
+      "relative_time": 2.2583394050598145
     },
-    {
-      "timestamp": "2025-01-04T02:37:55.439413",
-      "cpu_percent": 23.51,
-      "ram_percent": 52.7,
-      "ram_used_gb": 33.49789810180664,
-      "gpu_memory_used": 3662.0,
-      "relative_time": 3.3292760848999023
+    {
+      "timestamp": "2025-01-05T23:58:04.583438",
+      "cpu_percent": 21.46,
+      "ram_percent": 72.9,
+      "ram_used_gb": 46.4000358581543,
+      "gpu_memory_used": 6898.0,
+      "relative_time": 3.3375651836395264
     },
-    {
-      "timestamp": "2025-01-04T02:37:56.511211",
-      "cpu_percent": 14.69,
-      "ram_percent": 52.7,
-      "ram_used_gb": 33.494102478027344,
-      "gpu_memory_used": 3668.0,
-      "relative_time": 4.397106885910034
+    {
+      "timestamp": "2025-01-05T23:58:05.662045",
+      "cpu_percent": 21.96,
+      "ram_percent": 73.0,
+      "ram_used_gb": 46.42799758911133,
+      "gpu_memory_used": 6898.0,
+      "relative_time": 4.432476043701172
     },
-    {
-      "timestamp": "2025-01-04T02:37:57.582176",
-      "cpu_percent": 11.01,
-      "ram_percent": 52.8,
-      "ram_used_gb": 33.564491271972656,
-      "gpu_memory_used": 3665.0,
-      "relative_time": 5.46670126914978
+    {
+      "timestamp": "2025-01-05T23:58:06.757941",
+      "cpu_percent": 22.5,
+      "ram_percent": 73.0,
+      "ram_used_gb": 46.43841552734375,
+      "gpu_memory_used": 6897.0,
+      "relative_time": 5.494191408157349
     },
-    {
-      "timestamp": "2025-01-04T02:37:58.637969",
-      "cpu_percent": 15.04,
-      "ram_percent": 52.8,
-      "ram_used_gb": 33.555362701416016,
-      "gpu_memory_used": 3668.0,
-      "relative_time": 6.523184061050415
+    {
+      "timestamp": "2025-01-05T23:58:07.820104",
+      "cpu_percent": 29.33,
+      "ram_percent": 73.2,
+      "ram_used_gb": 46.577056884765625,
+      "gpu_memory_used": 6898.0,
+      "relative_time": 6.57433009147644
     },
-    {
-      "timestamp": "2025-01-04T02:37:59.700880",
-      "cpu_percent": 13.32,
-      "ram_percent": 52.8,
-      "ram_used_gb": 33.559967041015625,
-      "gpu_memory_used": 3668.0,
-      "relative_time": 7.589032888412476
+    {
+      "timestamp": "2025-01-05T23:58:08.899575",
+      "cpu_percent": 44.12,
+      "ram_percent": 73.2,
+      "ram_used_gb": 46.559593200683594,
+      "gpu_memory_used": 6898.0,
+      "relative_time": 7.6514153480529785
     },
-    {
-      "timestamp": "2025-01-04T02:38:00.773895",
-      "cpu_percent": 12.45,
-      "ram_percent": 52.8,
-      "ram_used_gb": 33.609134674072266,
-      "gpu_memory_used": 3667.0,
-      "relative_time": 8.677486181259155
+    {
+      "timestamp": "2025-01-05T23:58:09.977111",
+      "cpu_percent": 52.3,
+      "ram_percent": 73.1,
+      "ram_used_gb": 46.498756408691406,
+      "gpu_memory_used": 6898.0,
+      "relative_time": 8.74302339553833
     },
-    {
-      "timestamp": "2025-01-04T02:38:01.851195",
-      "cpu_percent": 12.62,
-      "ram_percent": 52.9,
-      "ram_used_gb": 33.67635726928711,
-      "gpu_memory_used": 3665.0,
-      "relative_time": 9.734971046447754
+    {
+      "timestamp": "2025-01-05T23:58:11.067817",
+      "cpu_percent": 32.66,
+      "ram_percent": 73.2,
+      "ram_used_gb": 46.585052490234375,
+      "gpu_memory_used": 6889.0,
+      "relative_time": 9.832671403884888
     },
-    {
-      "timestamp": "2025-01-04T02:38:02.907897",
-      "cpu_percent": 20.61,
-      "ram_percent": 53.0,
-      "ram_used_gb": 33.72555160522461,
-      "gpu_memory_used": 3660.0,
-      "relative_time": 10.813292026519775
+    {
+      "timestamp": "2025-01-05T23:58:12.158255",
+      "cpu_percent": 28.38,
+      "ram_percent": 73.7,
+      "ram_used_gb": 46.87907791137695,
+      "gpu_memory_used": 6889.0,
+      "relative_time": 10.914559125900269
     },
-    {
-      "timestamp": "2025-01-04T02:38:03.996322",
-      "cpu_percent": 33.24,
-      "ram_percent": 53.2,
-      "ram_used_gb": 33.832088470458984,
-      "gpu_memory_used": 3660.0,
-      "relative_time": 11.917856454849243
+    {
+      "timestamp": "2025-01-05T23:58:13.239163",
+      "cpu_percent": 31.73,
+      "ram_percent": 73.8,
+      "ram_used_gb": 46.930152893066406,
+      "gpu_memory_used": 6893.0,
+      "relative_time": 11.998127222061157
     },
-    {
-      "timestamp": "2025-01-04T02:38:05.101973",
-      "cpu_percent": 14.24,
-      "ram_percent": 53.0,
-      "ram_used_gb": 33.7408447265625,
-      "gpu_memory_used": 3662.0,
-      "relative_time": 12.986546277999878
+    {
+      "timestamp": "2025-01-05T23:58:14.323142",
+      "cpu_percent": 31.32,
+      "ram_percent": 74.4,
+      "ram_used_gb": 47.331382751464844,
+      "gpu_memory_used": 6897.0,
+      "relative_time": 13.080781936645508
     },
     {
-      "timestamp": "2025-01-04T02:38:06.162037",
-      "cpu_percent": 14.38,
-      "ram_percent": 53.1,
-      "ram_used_gb": 33.774169921875,
-      "gpu_memory_used": 3662.0,
-      "relative_time": 14.062608242034912
+      "timestamp": "2025-01-05T23:58:15.407719",
+      "cpu_percent": 26.34,
+      "ram_percent": 74.4,
+      "ram_used_gb": 47.34866714477539,
+      "gpu_memory_used": 6897.0,
+      "relative_time": 14.168652534484863
     },
     {
-      "timestamp": "2025-01-04T02:38:07.248210",
-      "cpu_percent": 14.39,
-      "ram_percent": 53.2,
-      "ram_used_gb": 33.83738327026367,
-      "gpu_memory_used": 4029.0,
-      "relative_time": 15.156044960021973
+      "timestamp": "2025-01-05T23:58:16.493716",
+      "cpu_percent": 39.87,
+      "ram_percent": 74.4,
+      "ram_used_gb": 47.35265350341797,
+      "gpu_memory_used": 6897.0,
+      "relative_time": 15.266503810882568
     },
     {
-      "timestamp": "2025-01-04T02:38:08.329582",
-      "cpu_percent": 31.18,
-      "ram_percent": 53.2,
-      "ram_used_gb": 33.87126541137695,
-      "gpu_memory_used": 4032.0,
-      "relative_time": 16.249940395355225
+      "timestamp": "2025-01-05T23:58:17.592593",
+      "cpu_percent": 39.32,
+      "ram_percent": 74.5,
+      "ram_used_gb": 47.37355041503906,
+      "gpu_memory_used": 6898.0,
+      "relative_time": 16.375996112823486
     },
     {
-      "timestamp": "2025-01-04T02:38:09.432992",
-      "cpu_percent": 19.33,
-      "ram_percent": 53.2,
-      "ram_used_gb": 33.842403411865234,
-      "gpu_memory_used": 4032.0,
-      "relative_time": 17.331223011016846
+      "timestamp": "2025-01-05T23:58:18.701607",
+      "cpu_percent": 33.48,
+      "ram_percent": 74.4,
+      "ram_used_gb": 47.338523864746094,
+      "gpu_memory_used": 6894.0,
+      "relative_time": 17.455860376358032
     },
     {
-      "timestamp": "2025-01-04T02:38:10.505101",
-      "cpu_percent": 13.34,
-      "ram_percent": 53.2,
-      "ram_used_gb": 33.86738967895508,
-      "gpu_memory_used": 4029.0,
-      "relative_time": 18.390397548675537
+      "timestamp": "2025-01-05T23:58:19.781209",
+      "cpu_percent": 24.39,
+      "ram_percent": 75.7,
+      "ram_used_gb": 48.1730842590332,
+      "gpu_memory_used": 6894.0,
+      "relative_time": 18.53490376472473
     },
     {
-      "timestamp": "2025-01-04T02:38:11.570033",
-      "cpu_percent": 12.61,
-      "ram_percent": 53.4,
-      "ram_used_gb": 33.938289642333984,
-      "gpu_memory_used": 4028.0,
-      "relative_time": 19.477521181106567
+      "timestamp": "2025-01-05T23:58:20.860173",
+      "cpu_percent": 27.27,
+      "ram_percent": 75.6,
+      "ram_used_gb": 48.08787536621094,
+      "gpu_memory_used": 6894.0,
+      "relative_time": 19.615966081619263
     },
     {
-      "timestamp": "2025-01-04T02:38:12.663780",
-      "cpu_percent": 15.78,
-      "ram_percent": 53.4,
-      "ram_used_gb": 33.969398498535156,
-      "gpu_memory_used": 4030.0,
-      "relative_time": 20.57425808906555
+      "timestamp": "2025-01-05T23:58:21.942004",
+      "cpu_percent": 26.82,
+      "ram_percent": 75.8,
+      "ram_used_gb": 48.20832443237305,
+      "gpu_memory_used": 6892.0,
+      "relative_time": 20.724673748016357
     },
     {
-      "timestamp": "2025-01-04T02:38:13.750065",
-      "cpu_percent": 18.69,
-      "ram_percent": 53.5,
-      "ram_used_gb": 34.03954315185547,
-      "gpu_memory_used": 4021.0,
-      "relative_time": 21.652076244354248
+      "timestamp": "2025-01-05T23:58:23.050088",
+      "cpu_percent": 46.01,
+      "ram_percent": 75.8,
+      "ram_used_gb": 48.23963928222656,
+      "gpu_memory_used": 6887.0,
+      "relative_time": 21.814561367034912
     },
     {
-      "timestamp": "2025-01-04T02:38:14.825318",
-      "cpu_percent": 10.48,
-      "ram_percent": 53.6,
-      "ram_used_gb": 34.07048416137695,
-      "gpu_memory_used": 4025.0,
-      "relative_time": 22.73010230064392
+      "timestamp": "2025-01-05T23:58:24.139949",
+      "cpu_percent": 27.91,
+      "ram_percent": 75.9,
+      "ram_used_gb": 48.260440826416016,
+      "gpu_memory_used": 6894.0,
+      "relative_time": 22.900237798690796
     },
     {
-      "timestamp": "2025-01-04T02:38:15.912340",
-      "cpu_percent": 12.53,
-      "ram_percent": 53.6,
-      "ram_used_gb": 34.09389877319336,
-      "gpu_memory_used": 4026.0,
-      "relative_time": 23.81609869003296
+      "timestamp": "2025-01-05T23:58:25.226167",
+      "cpu_percent": 20.4,
+      "ram_percent": 76.4,
+      "ram_used_gb": 48.5912971496582,
+      "gpu_memory_used": 6894.0,
+      "relative_time": 24.029305934906006
     },
     {
-      "timestamp": "2025-01-04T02:38:17.003329",
-      "cpu_percent": 16.09,
-      "ram_percent": 53.7,
-      "ram_used_gb": 34.1781120300293,
-      "gpu_memory_used": 4025.0,
-      "relative_time": 24.90904140472412
+      "timestamp": "2025-01-05T23:58:26.354518",
+      "cpu_percent": 26.17,
+      "ram_percent": 76.5,
+      "ram_used_gb": 48.63065719604492,
+      "gpu_memory_used": 6896.0,
+      "relative_time": 25.113131046295166
     },
     {
-      "timestamp": "2025-01-04T02:38:18.079837",
-      "cpu_percent": 14.98,
-      "ram_percent": 53.8,
-      "ram_used_gb": 34.21260070800781,
-      "gpu_memory_used": 4025.0,
-      "relative_time": 25.986279249191284
+      "timestamp": "2025-01-05T23:58:27.438557",
+      "cpu_percent": 19.84,
+      "ram_percent": 76.5,
+      "ram_used_gb": 48.630950927734375,
+      "gpu_memory_used": 6896.0,
+      "relative_time": 26.198577404022217
     },
     {
-      "timestamp": "2025-01-04T02:38:19.167635",
-      "cpu_percent": 14.85,
-      "ram_percent": 53.8,
-      "ram_used_gb": 34.23923873901367,
-      "gpu_memory_used": 4024.0,
-      "relative_time": 27.076823234558105
+      "timestamp": "2025-01-05T23:58:28.523869",
+      "cpu_percent": 25.87,
+      "ram_percent": 76.4,
+      "ram_used_gb": 48.61442565917969,
+      "gpu_memory_used": 6896.0,
+      "relative_time": 27.290891647338867
     },
     {
-      "timestamp": "2025-01-04T02:38:20.258141",
-      "cpu_percent": 15.05,
-      "ram_percent": 53.9,
-      "ram_used_gb": 34.26483917236328,
-      "gpu_memory_used": 4015.0,
-      "relative_time": 28.144607067108154
+      "timestamp": "2025-01-05T23:58:29.616289",
+      "cpu_percent": 26.29,
+      "ram_percent": 78.0,
+      "ram_used_gb": 49.63731384277344,
+      "gpu_memory_used": 6896.0,
+      "relative_time": 28.37503457069397
     },
     {
-      "timestamp": "2025-01-04T02:38:21.315694",
-      "cpu_percent": 17.08,
-      "ram_percent": 53.9,
-      "ram_used_gb": 34.31473922729492,
-      "gpu_memory_used": 4016.0,
-      "relative_time": 29.20189356803894
+      "timestamp": "2025-01-05T23:58:30.702453",
+      "cpu_percent": 31.57,
+      "ram_percent": 79.0,
+      "ram_used_gb": 50.24030303955078,
+      "gpu_memory_used": 6896.0,
+      "relative_time": 29.482722520828247
     },
     {
-      "timestamp": "2025-01-04T02:38:22.388259",
-      "cpu_percent": 17.47,
-      "ram_percent": 54.0,
-      "ram_used_gb": 34.35490798950195,
-      "gpu_memory_used": 4016.0,
-      "relative_time": 30.28918957710266
+      "timestamp": "2025-01-05T23:58:31.807837",
+      "cpu_percent": 43.3,
+      "ram_percent": 78.9,
+      "ram_used_gb": 50.18223190307617,
+      "gpu_memory_used": 6897.0,
+      "relative_time": 30.574514150619507
     },
     {
-      "timestamp": "2025-01-04T02:38:23.463469",
-      "cpu_percent": 15.76,
-      "ram_percent": 54.0,
-      "ram_used_gb": 34.33717346191406,
-      "gpu_memory_used": 4002.0,
-      "relative_time": 31.364880561828613
+      "timestamp": "2025-01-05T23:58:32.900169",
+      "cpu_percent": 31.68,
+      "ram_percent": 78.7,
+      "ram_used_gb": 50.09246063232422,
+      "gpu_memory_used": 6898.0,
+      "relative_time": 31.645864486694336
     },
     {
-      "timestamp": "2025-01-04T02:38:24.540334",
-      "cpu_percent": 13.54,
-      "ram_percent": 54.1,
-      "ram_used_gb": 34.38197708129883,
-      "gpu_memory_used": 3999.0,
-      "relative_time": 32.4253191947937
+      "timestamp": "2025-01-05T23:58:33.970378",
+      "cpu_percent": 33.76,
+      "ram_percent": 78.7,
+      "ram_used_gb": 50.055450439453125,
+      "gpu_memory_used": 6899.0,
+      "relative_time": 32.75911498069763
     },
     {
-      "timestamp": "2025-01-04T02:38:25.597934",
-      "cpu_percent": 13.99,
-      "ram_percent": 54.2,
-      "ram_used_gb": 34.48365783691406,
-      "gpu_memory_used": 4004.0,
-      "relative_time": 33.50029754638672
+      "timestamp": "2025-01-05T23:58:35.085855",
+      "cpu_percent": 30.84,
+      "ram_percent": 78.6,
+      "ram_used_gb": 49.99536895751953,
+      "gpu_memory_used": 6900.0,
+      "relative_time": 33.86884117126465
     },
     {
-      "timestamp": "2025-01-04T02:38:26.673108",
-      "cpu_percent": 15.16,
-      "ram_percent": 54.2,
-      "ram_used_gb": 34.50083923339844,
-      "gpu_memory_used": 4011.0,
-      "relative_time": 34.5756139755249
+      "timestamp": "2025-01-05T23:58:36.197184",
+      "cpu_percent": 72.82,
+      "ram_percent": 78.7,
+      "ram_used_gb": 50.043052673339844,
+      "gpu_memory_used": 6905.0,
+      "relative_time": 35.01772928237915
     },
     {
-      "timestamp": "2025-01-04T02:38:27.748147",
-      "cpu_percent": 17.68,
-      "ram_percent": 54.2,
-      "ram_used_gb": 34.49884033203125,
-      "gpu_memory_used": 4016.0,
-      "relative_time": 35.650988817214966
+      "timestamp": "2025-01-05T23:58:37.343692",
+      "cpu_percent": 88.62,
+      "ram_percent": 78.6,
+      "ram_used_gb": 50.025630950927734,
+      "gpu_memory_used": 6905.0,
+      "relative_time": 36.140772104263306
     },
     {
-      "timestamp": "2025-01-04T02:38:28.835603",
-      "cpu_percent": 26.81,
-      "ram_percent": 54.3,
-      "ram_used_gb": 34.536773681640625,
-      "gpu_memory_used": 4015.0,
-      "relative_time": 36.73981595039368
+      "timestamp": "2025-01-05T23:58:38.465432",
+      "cpu_percent": 83.04,
+      "ram_percent": 78.6,
+      "ram_used_gb": 49.980735778808594,
+      "gpu_memory_used": 6905.0,
+      "relative_time": 37.219876527786255
     },
     {
-      "timestamp": "2025-01-04T02:38:29.912604",
-      "cpu_percent": 27.61,
-      "ram_percent": 54.3,
-      "ram_used_gb": 34.56916427612305,
-      "gpu_memory_used": 4016.0,
-      "relative_time": 37.81279993057251
+      "timestamp": "2025-01-05T23:58:39.545718",
+      "cpu_percent": 27.47,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.38398742675781,
+      "gpu_memory_used": 6905.0,
+      "relative_time": 38.304253339767456
     },
     {
-      "timestamp": "2025-01-04T02:38:30.984988",
-      "cpu_percent": 34.24,
-      "ram_percent": 54.4,
-      "ram_used_gb": 34.599365234375,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 38.89973425865173
+      "timestamp": "2025-01-05T23:58:40.629875",
+      "cpu_percent": 27.94,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.291404724121094,
+      "gpu_memory_used": 6903.0,
+      "relative_time": 39.388391971588135
     },
     {
-      "timestamp": "2025-01-04T02:38:32.071596",
-      "cpu_percent": 31.95,
-      "ram_percent": 54.2,
-      "ram_used_gb": 34.46506881713867,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 39.95902729034424
+      "timestamp": "2025-01-05T23:58:41.713515",
+      "cpu_percent": 40.52,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.300716400146484,
+      "gpu_memory_used": 6901.0,
+      "relative_time": 40.46565055847168
     },
     {
-      "timestamp": "2025-01-04T02:38:33.140836",
-      "cpu_percent": 27.78,
-      "ram_percent": 54.3,
-      "ram_used_gb": 34.51242446899414,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 41.0454580783844
+      "timestamp": "2025-01-05T23:58:42.792104",
+      "cpu_percent": 33.71,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.276004791259766,
+      "gpu_memory_used": 6906.0,
+      "relative_time": 41.568103313446045
     },
     {
-      "timestamp": "2025-01-04T02:38:34.229919",
-      "cpu_percent": 21.09,
-      "ram_percent": 54.3,
-      "ram_used_gb": 34.513973236083984,
-      "gpu_memory_used": 4011.0,
-      "relative_time": 42.133435010910034
+      "timestamp": "2025-01-05T23:58:43.893473",
+      "cpu_percent": 41.05,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.27006912231445,
+      "gpu_memory_used": 6906.0,
+      "relative_time": 42.63854742050171
     },
     {
-      "timestamp": "2025-01-04T02:38:35.317486",
-      "cpu_percent": 17.26,
-      "ram_percent": 53.9,
-      "ram_used_gb": 34.3167839050293,
-      "gpu_memory_used": 4020.0,
-      "relative_time": 43.21739077568054
+      "timestamp": "2025-01-05T23:58:44.963495",
+      "cpu_percent": 36.05,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.29539108276367,
+      "gpu_memory_used": 6907.0,
+      "relative_time": 43.74750852584839
     },
     {
-      "timestamp": "2025-01-04T02:38:36.394375",
-      "cpu_percent": 12.32,
-      "ram_percent": 54.0,
-      "ram_used_gb": 34.34043884277344,
-      "gpu_memory_used": 4020.0,
-      "relative_time": 44.27889919281006
+      "timestamp": "2025-01-05T23:58:46.072484",
+      "cpu_percent": 33.63,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.28715896606445,
+      "gpu_memory_used": 6907.0,
+      "relative_time": 44.81742191314697
     },
     {
-      "timestamp": "2025-01-04T02:38:37.454005",
-      "cpu_percent": 12.46,
-      "ram_percent": 54.0,
-      "ram_used_gb": 34.37453842163086,
-      "gpu_memory_used": 4020.0,
-      "relative_time": 45.341508626937866
+      "timestamp": "2025-01-05T23:58:47.143039",
+      "cpu_percent": 38.61,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.28096008300781,
+      "gpu_memory_used": 6906.0,
+      "relative_time": 45.88616967201233
     },
     {
-      "timestamp": "2025-01-04T02:38:38.515337",
-      "cpu_percent": 14.16,
-      "ram_percent": 54.1,
-      "ram_used_gb": 34.401729583740234,
-      "gpu_memory_used": 4019.0,
-      "relative_time": 46.410696506500244
+      "timestamp": "2025-01-05T23:58:48.211669",
+      "cpu_percent": 34.67,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.27309799194336,
+      "gpu_memory_used": 6906.0,
+      "relative_time": 46.96554446220398
     },
     {
-      "timestamp": "2025-01-04T02:38:39.593044",
-      "cpu_percent": 13.71,
-      "ram_percent": 54.1,
-      "ram_used_gb": 34.435630798339844,
-      "gpu_memory_used": 4019.0,
-      "relative_time": 47.48556661605835
+      "timestamp": "2025-01-05T23:58:49.290049",
+      "cpu_percent": 41.6,
+      "ram_percent": 77.4,
+      "ram_used_gb": 49.246002197265625,
+      "gpu_memory_used": 6906.0,
+      "relative_time": 48.04342079162598
     },
     {
-      "timestamp": "2025-01-04T02:38:40.665509",
-      "cpu_percent": 13.17,
-      "ram_percent": 54.2,
-      "ram_used_gb": 34.49795150756836,
-      "gpu_memory_used": 4016.0,
-      "relative_time": 48.551952838897705
+      "timestamp": "2025-01-05T23:58:50.368506",
+      "cpu_percent": 36.04,
+      "ram_percent": 77.4,
+      "ram_used_gb": 49.25265121459961,
+      "gpu_memory_used": 6906.0,
+      "relative_time": 49.106462717056274
     },
     {
-      "timestamp": "2025-01-04T02:38:41.724929",
-      "cpu_percent": 12.67,
-      "ram_percent": 54.3,
-      "ram_used_gb": 34.52568054199219,
-      "gpu_memory_used": 4011.0,
-      "relative_time": 49.61591196060181
+      "timestamp": "2025-01-05T23:58:51.431363",
+      "cpu_percent": 35.67,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.333187103271484,
+      "gpu_memory_used": 6901.0,
+      "relative_time": 50.20661163330078
     },
     {
-      "timestamp": "2025-01-04T02:38:42.801080",
-      "cpu_percent": 12.83,
-      "ram_percent": 54.4,
-      "ram_used_gb": 34.579071044921875,
-      "gpu_memory_used": 4007.0,
-      "relative_time": 50.70357823371887
+      "timestamp": "2025-01-05T23:58:52.531456",
+      "cpu_percent": 44.21,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.31691360473633,
+      "gpu_memory_used": 6902.0,
+      "relative_time": 51.29085111618042
     },
     {
-      "timestamp": "2025-01-04T02:38:43.884984",
-      "cpu_percent": 12.31,
-      "ram_percent": 54.4,
-      "ram_used_gb": 34.59829330444336,
-      "gpu_memory_used": 4003.0,
-      "relative_time": 51.771891832351685
+      "timestamp": "2025-01-05T23:58:53.616613",
+      "cpu_percent": 31.69,
+      "ram_percent": 77.5,
+      "ram_used_gb": 49.31277847290039,
+      "gpu_memory_used": 6902.0,
+      "relative_time": 52.36105275154114
     },
     {
-      "timestamp": "2025-01-04T02:38:44.957477",
-      "cpu_percent": 12.58,
-      "ram_percent": 54.7,
-      "ram_used_gb": 34.76633071899414,
-      "gpu_memory_used": 4003.0,
-      "relative_time": 52.859192848205566
+      "timestamp": "2025-01-05T23:58:54.686388",
+      "cpu_percent": 28.1,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.33551025390625,
+      "gpu_memory_used": 6902.0,
+      "relative_time": 53.44114351272583
     },
     {
-      "timestamp": "2025-01-04T02:38:46.031581",
-      "cpu_percent": 14.48,
-      "ram_percent": 54.6,
-      "ram_used_gb": 34.76308059692383,
-      "gpu_memory_used": 4013.0,
-      "relative_time": 53.91648840904236
+      "timestamp": "2025-01-05T23:58:55.766386",
+      "cpu_percent": 47.14,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.34830856323242,
+      "gpu_memory_used": 6902.0,
+      "relative_time": 54.51214838027954
     },
     {
-      "timestamp": "2025-01-04T02:38:47.091693",
-      "cpu_percent": 14.35,
-      "ram_percent": 54.7,
-      "ram_used_gb": 34.81193923950195,
-      "gpu_memory_used": 4013.0,
-      "relative_time": 54.993882179260254
+      "timestamp": "2025-01-05T23:58:56.837399",
+      "cpu_percent": 39.78,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.3748664855957,
+      "gpu_memory_used": 6902.0,
+      "relative_time": 55.71548247337341
     },
     {
-      "timestamp": "2025-01-04T02:38:48.178826",
-      "cpu_percent": 16.46,
-      "ram_percent": 54.7,
-      "ram_used_gb": 34.784278869628906,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 56.064146518707275
+      "timestamp": "2025-01-05T23:58:58.041708",
+      "cpu_percent": 67.91,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.35768127441406,
+      "gpu_memory_used": 6908.0,
+      "relative_time": 56.83315873146057
     },
     {
-      "timestamp": "2025-01-04T02:38:49.235997",
-      "cpu_percent": 12.84,
-      "ram_percent": 54.7,
-      "ram_used_gb": 34.79767608642578,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 57.12374472618103
+      "timestamp": "2025-01-05T23:58:59.158028",
+      "cpu_percent": 55.69,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.38890838623047,
+      "gpu_memory_used": 6920.0,
+      "relative_time": 57.956458568573
     },
     {
-      "timestamp": "2025-01-04T02:38:50.295962",
-      "cpu_percent": 15.69,
-      "ram_percent": 54.8,
-      "ram_used_gb": 34.8546257019043,
-      "gpu_memory_used": 4013.0,
-      "relative_time": 58.180296421051025
+      "timestamp": "2025-01-05T23:59:00.281669",
+      "cpu_percent": 48.11,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.3576545715332,
+      "gpu_memory_used": 6921.0,
+      "relative_time": 59.03302216529846
     },
     {
-      "timestamp": "2025-01-04T02:38:51.357678",
-      "cpu_percent": 14.54,
-      "ram_percent": 54.8,
-      "ram_used_gb": 34.8900260925293,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 59.242270708084106
-    },
-    {
-      "timestamp": "2025-01-04T02:38:52.415380",
-      "cpu_percent": 14.74,
-      "ram_percent": 54.9,
-      "ram_used_gb": 34.92173767089844,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 60.307114601135254
-    },
-    {
-      "timestamp": "2025-01-04T02:38:53.490598",
-      "cpu_percent": 13.82,
-      "ram_percent": 55.1,
-      "ram_used_gb": 35.028907775878906,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 61.37576389312744
-    },
-    {
-      "timestamp": "2025-01-04T02:38:54.548660",
-      "cpu_percent": 11.31,
-      "ram_percent": 55.1,
-      "ram_used_gb": 35.05375289916992,
-      "gpu_memory_used": 4011.0,
-      "relative_time": 62.43392610549927
-    },
-    {
-      "timestamp": "2025-01-04T02:38:55.609900",
-      "cpu_percent": 14.35,
-      "ram_percent": 55.1,
-      "ram_used_gb": 35.03831100463867,
-      "gpu_memory_used": 4011.0,
-      "relative_time": 63.493370056152344
-    },
-    {
-      "timestamp": "2025-01-04T02:38:56.666032",
-      "cpu_percent": 13.11,
-      "ram_percent": 55.1,
-      "ram_used_gb": 35.07795333862305,
-      "gpu_memory_used": 4011.0,
-      "relative_time": 64.54955720901489
-    },
-    {
-      "timestamp": "2025-01-04T02:38:57.730782",
-      "cpu_percent": 16.01,
-      "ram_percent": 55.2,
-      "ram_used_gb": 35.11598587036133,
-      "gpu_memory_used": 4007.0,
-      "relative_time": 65.61445665359497
-    },
-    {
-      "timestamp": "2025-01-04T02:38:58.787051",
-      "cpu_percent": 13.68,
-      "ram_percent": 55.3,
-      "ram_used_gb": 35.15185546875,
-      "gpu_memory_used": 4007.0,
-      "relative_time": 66.67095923423767
-    },
-    {
-      "timestamp": "2025-01-04T02:38:59.847606",
-      "cpu_percent": 13.08,
-      "ram_percent": 55.3,
-      "ram_used_gb": 35.183753967285156,
-      "gpu_memory_used": 4003.0,
-      "relative_time": 67.73307466506958
-    },
-    {
-      "timestamp": "2025-01-04T02:39:00.918871",
-      "cpu_percent": 13.26,
-      "ram_percent": 55.4,
-      "ram_used_gb": 35.22275161743164,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 68.80590057373047
-    },
-    {
-      "timestamp": "2025-01-04T02:39:01.991170",
-      "cpu_percent": 11.05,
-      "ram_percent": 55.3,
-      "ram_used_gb": 35.15507125854492,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 69.8813705444336
-    },
-    {
-      "timestamp": "2025-01-04T02:39:03.058942",
-      "cpu_percent": 11.08,
-      "ram_percent": 55.4,
-      "ram_used_gb": 35.2095947265625,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 70.94484400749207
-    },
-    {
-      "timestamp": "2025-01-04T02:39:04.116159",
-      "cpu_percent": 12.74,
-      "ram_percent": 55.4,
-      "ram_used_gb": 35.24392318725586,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 72.00281810760498
-    },
-    {
-      "timestamp": "2025-01-04T02:39:05.187410",
-      "cpu_percent": 11.04,
-      "ram_percent": 55.4,
-      "ram_used_gb": 35.259830474853516,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 73.07217526435852
-    },
-    {
-      "timestamp": "2025-01-04T02:39:06.248588",
-      "cpu_percent": 13.66,
-      "ram_percent": 55.5,
-      "ram_used_gb": 35.29854965209961,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 74.13533973693848
-    },
-    {
-      "timestamp": "2025-01-04T02:39:07.321179",
-      "cpu_percent": 11.16,
-      "ram_percent": 55.6,
-      "ram_used_gb": 35.346981048583984,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 75.18772435188293
-    },
-    {
-      "timestamp": "2025-01-04T02:39:08.362496",
-      "cpu_percent": 9.46,
-      "ram_percent": 56.1,
-      "ram_used_gb": 35.69393539428711,
-      "gpu_memory_used": 4006.0,
-      "relative_time": 76.25136637687683
-    },
-    {
-      "timestamp": "2025-01-04T02:39:09.438218",
-      "cpu_percent": 15.88,
-      "ram_percent": 56.1,
-      "ram_used_gb": 35.6658821105957,
-      "gpu_memory_used": 4006.0,
-      "relative_time": 77.32329249382019
-    },
-    {
-      "timestamp": "2025-01-04T02:39:10.495653",
-      "cpu_percent": 13.25,
-      "ram_percent": 56.1,
-      "ram_used_gb": 35.658119201660156,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 78.38210940361023
-    },
-    {
-      "timestamp": "2025-01-04T02:39:11.560094",
-      "cpu_percent": 10.8,
-      "ram_percent": 56.1,
-      "ram_used_gb": 35.694610595703125,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 79.44816374778748
-    },
-    {
-      "timestamp": "2025-01-04T02:39:12.621879",
-      "cpu_percent": 12.58,
-      "ram_percent": 56.1,
-      "ram_used_gb": 35.68545150756836,
-      "gpu_memory_used": 4014.0,
-      "relative_time": 80.51017951965332
-    },
-    {
-      "timestamp": "2025-01-04T02:39:13.692834",
-      "cpu_percent": 13.3,
-      "ram_percent": 56.2,
-      "ram_used_gb": 35.730979919433594,
-      "gpu_memory_used": 4012.0,
-      "relative_time": 81.57789969444275
-    },
-    {
-      "timestamp": "2025-01-04T02:39:14.753401",
-      "cpu_percent": 14.75,
-      "ram_percent": 56.2,
-      "ram_used_gb": 35.73103332519531,
-      "gpu_memory_used": 4012.0,
-      "relative_time": 82.63830900192261
-    },
-    {
-      "timestamp": "2025-01-04T02:39:15.811385",
-      "cpu_percent": 14.69,
-      "ram_percent": 56.2,
-      "ram_used_gb": 35.740108489990234,
-      "gpu_memory_used": 4011.0,
-      "relative_time": 83.69796371459961
-    },
-    {
-      "timestamp": "2025-01-04T02:39:16.874197",
-      "cpu_percent": 14.28,
-      "ram_percent": 56.2,
-      "ram_used_gb": 35.767982482910156,
-      "gpu_memory_used": 4010.0,
-      "relative_time": 84.76145887374878
-    },
-    {
-      "timestamp": "2025-01-04T02:39:17.936944",
-      "cpu_percent": 12.99,
-      "ram_percent": 56.3,
-      "ram_used_gb": 35.81233596801758,
-      "gpu_memory_used": 4010.0,
-      "relative_time": 85.84119439125061
-    },
-    {
-      "timestamp": "2025-01-04T02:39:19.027320",
-      "cpu_percent": 12.79,
-      "ram_percent": 56.6,
-      "ram_used_gb": 36.0085563659668,
-      "gpu_memory_used": 4010.0,
-      "relative_time": 86.91442775726318
-    },
-    {
-      "timestamp": "2025-01-04T02:39:20.088804",
-      "cpu_percent": 15.62,
-      "ram_percent": 56.9,
-      "ram_used_gb": 36.16616439819336,
-      "gpu_memory_used": 4006.0,
-      "relative_time": 88.00494360923767
-    },
-    {
-      "timestamp": "2025-01-04T02:39:21.191796",
-      "cpu_percent": 12.98,
-      "ram_percent": 57.1,
-      "ram_used_gb": 36.3217658996582,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 89.12522411346436
-    },
-    {
-      "timestamp": "2025-01-04T02:39:22.311508",
-      "cpu_percent": 14.77,
-      "ram_percent": 56.9,
-      "ram_used_gb": 36.191429138183594,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 90.19932198524475
-    },
-    {
-      "timestamp": "2025-01-04T02:39:23.372871",
-      "cpu_percent": 12.98,
-      "ram_percent": 57.1,
-      "ram_used_gb": 36.29658889770508,
-      "gpu_memory_used": 4005.0,
-      "relative_time": 91.3045928478241
-    },
-    {
-      "timestamp": "2025-01-04T02:39:24.479087",
-      "cpu_percent": 14.64,
-      "ram_percent": 57.1,
-      "ram_used_gb": 36.30413055419922,
-      "gpu_memory_used": 3998.0,
-      "relative_time": 92.36360597610474
-    },
-    {
-      "timestamp": "2025-01-04T02:39:25.537969",
-      "cpu_percent": 14.51,
-      "ram_percent": 57.1,
-      "ram_used_gb": 36.311763763427734,
-      "gpu_memory_used": 3998.0,
-      "relative_time": 93.42230415344238
-    },
-    {
-      "timestamp": "2025-01-04T02:39:26.594967",
-      "cpu_percent": 13.05,
-      "ram_percent": 57.1,
-      "ram_used_gb": 36.351402282714844,
-      "gpu_memory_used": 3998.0,
-      "relative_time": 94.47847175598145
-    },
-    {
-      "timestamp": "2025-01-04T02:39:27.652223",
-      "cpu_percent": 15.05,
-      "ram_percent": 57.2,
-      "ram_used_gb": 36.36949157714844,
-      "gpu_memory_used": 4004.0,
-      "relative_time": 95.53560948371887
-    },
-    {
-      "timestamp": "2025-01-04T02:39:28.708008",
-      "cpu_percent": 12.51,
-      "ram_percent": 57.2,
-      "ram_used_gb": 36.3841667175293,
-      "gpu_memory_used": 4004.0,
-      "relative_time": 96.59472155570984
-    },
-    {
-      "timestamp": "2025-01-04T02:39:29.768866",
-      "cpu_percent": 10.83,
-      "ram_percent": 57.2,
-      "ram_used_gb": 36.39939880371094,
-      "gpu_memory_used": 4004.0,
-      "relative_time": 97.6679356098175
-    },
-    {
-      "timestamp": "2025-01-04T02:39:30.844295",
-      "cpu_percent": 14.61,
-      "ram_percent": 57.3,
-      "ram_used_gb": 36.42519760131836,
-      "gpu_memory_used": 4004.0,
-      "relative_time": 98.74996089935303
-    },
-    {
-      "timestamp": "2025-01-04T02:39:31.934080",
-      "cpu_percent": 11.74,
-      "ram_percent": 57.0,
-      "ram_used_gb": 36.271087646484375,
-      "gpu_memory_used": 4004.0,
-      "relative_time": 99.81860518455505
-    },
-    {
-      "timestamp": "2025-01-04T02:39:32.989954",
-      "cpu_percent": 12.09,
-      "ram_percent": 57.2,
-      "ram_used_gb": 36.368350982666016,
-      "gpu_memory_used": 4010.0,
-      "relative_time": 100.87712931632996
-    },
-    {
-      "timestamp": "2025-01-04T02:39:34.061411",
-      "cpu_percent": 11.07,
-      "ram_percent": 57.2,
-      "ram_used_gb": 36.38072967529297,
-      "gpu_memory_used": 4010.0,
-      "relative_time": 101.946035861969
-    },
-    {
-      "timestamp": "2025-01-04T02:39:35.117182",
-      "cpu_percent": 9.32,
-      "ram_percent": 57.2,
-      "ram_used_gb": 36.367733001708984,
-      "gpu_memory_used": 4415.0,
-      "relative_time": 103.00355505943298
-    },
-    {
-      "timestamp": "2025-01-04T02:39:36.179256",
-      "cpu_percent": 12.93,
-      "ram_percent": 57.2,
-      "ram_used_gb": 36.396636962890625,
-      "gpu_memory_used": 4417.0,
-      "relative_time": 104.06347131729126
-    },
-    {
-      "timestamp": "2025-01-04T02:39:37.237454",
-      "cpu_percent": 10.94,
-      "ram_percent": 57.3,
-      "ram_used_gb": 36.429630279541016,
-      "gpu_memory_used": 4417.0,
-      "relative_time": 105.12580728530884
-    },
-    {
-      "timestamp": "2025-01-04T02:39:38.310321",
-      "cpu_percent": 12.86,
-      "ram_percent": 57.3,
-      "ram_used_gb": 36.44291305541992,
-      "gpu_memory_used": 4418.0,
-      "relative_time": 106.17753839492798
-    },
-    {
-      "timestamp": "2025-01-04T02:39:39.355358",
-      "cpu_percent": 10.82,
-      "ram_percent": 57.3,
-      "ram_used_gb": 36.46603012084961,
-      "gpu_memory_used": 4418.0,
-      "relative_time": 107.24251008033752
-    },
-    {
-      "timestamp": "2025-01-04T02:39:40.413524",
-      "cpu_percent": 14.64,
-      "ram_percent": 57.4,
-      "ram_used_gb": 36.507179260253906,
-      "gpu_memory_used": 4418.0,
-      "relative_time": 108.29774165153503
-    },
-    {
-      "timestamp": "2025-01-04T02:39:41.482368",
-      "cpu_percent": 10.03,
-      "ram_percent": 58.1,
-      "ram_used_gb": 36.93812942504883,
-      "gpu_memory_used": 4418.0,
-      "relative_time": 109.36836910247803
-    },
-    {
-      "timestamp": "2025-01-04T02:39:42.546204",
-      "cpu_percent": 12.63,
-      "ram_percent": 58.0,
-      "ram_used_gb": 36.87542724609375,
-      "gpu_memory_used": 4418.0,
-      "relative_time": 110.43055510520935
-    },
-    {
-      "timestamp": "2025-01-04T02:39:43.604666",
-      "cpu_percent": 14.14,
-      "ram_percent": 58.0,
-      "ram_used_gb": 36.875328063964844,
-      "gpu_memory_used": 4426.0,
-      "relative_time": 111.49229407310486
-    },
-    {
-      "timestamp": "2025-01-04T02:39:44.664973",
-      "cpu_percent": 10.64,
-      "ram_percent": 58.0,
-      "ram_used_gb": 36.88217544555664,
-      "gpu_memory_used": 4425.0,
-      "relative_time": 112.55481696128845
-    },
-    {
-      "timestamp": "2025-01-04T02:39:45.741305",
-      "cpu_percent": 13.92,
-      "ram_percent": 57.9,
-      "ram_used_gb": 36.85449981689453,
-      "gpu_memory_used": 4425.0,
-      "relative_time": 113.62504053115845
-    },
-    {
-      "timestamp": "2025-01-04T02:39:46.799656",
-      "cpu_percent": 13.15,
-      "ram_percent": 58.0,
-      "ram_used_gb": 36.870826721191406,
-      "gpu_memory_used": 4423.0,
-      "relative_time": 114.6845052242279
-    },
-    {
-      "timestamp": "2025-01-04T02:39:47.859725",
-      "cpu_percent": 13.85,
-      "ram_percent": 58.0,
-      "ram_used_gb": 36.870948791503906,
-      "gpu_memory_used": 4423.0,
-      "relative_time": 115.74664235115051
-    },
-    {
-      "timestamp": "2025-01-04T02:39:48.919071",
-      "cpu_percent": 14.59,
-      "ram_percent": 58.0,
-      "ram_used_gb": 36.886802673339844,
-      "gpu_memory_used": 4422.0,
-      "relative_time": 116.80267906188965
-    },
-    {
-      "timestamp": "2025-01-04T02:39:49.976541",
-      "cpu_percent": 16.56,
-      "ram_percent": 58.0,
-      "ram_used_gb": 36.90068435668945,
-      "gpu_memory_used": 4422.0,
-      "relative_time": 117.86520886421204
-    },
-    {
-      "timestamp": "2025-01-04T02:39:51.036593",
-      "cpu_percent": 10.33,
-      "ram_percent": 58.1,
-      "ram_used_gb": 36.96821212768555,
-      "gpu_memory_used": 4416.0,
-      "relative_time": 118.92232513427734
-    },
-    {
-      "timestamp": "2025-01-04T02:39:52.098186",
-      "cpu_percent": 15.23,
-      "ram_percent": 58.1,
-      "ram_used_gb": 36.96358108520508,
-      "gpu_memory_used": 4416.0,
-      "relative_time": 119.98378920555115
-    },
-    {
-      "timestamp": "2025-01-04T02:39:53.168566",
-      "cpu_percent": 11.96,
-      "ram_percent": 58.2,
-      "ram_used_gb": 37.00669479370117,
-      "gpu_memory_used": 4416.0,
-      "relative_time": 121.05223441123962
-    },
-    {
-      "timestamp": "2025-01-04T02:39:54.230292",
-      "cpu_percent": 13.03,
-      "ram_percent": 58.2,
-      "ram_used_gb": 37.031307220458984,
-      "gpu_memory_used": 4416.0,
-      "relative_time": 122.11563086509705
-    },
-    {
-      "timestamp": "2025-01-04T02:39:55.287670",
-      "cpu_percent": 14.93,
-      "ram_percent": 58.2,
-      "ram_used_gb": 37.045589447021484,
-      "gpu_memory_used": 4416.0,
-      "relative_time": 123.17237305641174
-    },
-    {
-      "timestamp": "2025-01-04T02:39:56.349816",
-      "cpu_percent": 12.91,
-      "ram_percent": 58.3,
-      "ram_used_gb": 37.072689056396484,
-      "gpu_memory_used": 4416.0,
-      "relative_time": 124.23265671730042
-    },
-    {
-      "timestamp": "2025-01-04T02:39:57.409384",
-      "cpu_percent": 11.35,
-      "ram_percent": 58.3,
-      "ram_used_gb": 37.10455322265625,
-      "gpu_memory_used": 4416.0,
-      "relative_time": 125.29221749305725
-    },
-    {
-      "timestamp": "2025-01-04T02:39:58.464653",
-      "cpu_percent": 12.97,
-      "ram_percent": 58.4,
-      "ram_used_gb": 37.12955093383789,
-      "gpu_memory_used": 4416.0,
-      "relative_time": 126.34849739074707
-    },
-    {
-      "timestamp": "2025-01-04T02:39:59.521737",
-      "cpu_percent": 11.69,
-      "ram_percent": 58.4,
-      "ram_used_gb": 37.139190673828125,
-      "gpu_memory_used": 4416.0,
-      "relative_time": 127.40602111816406
-    },
-    {
-      "timestamp": "2025-01-04T02:40:00.581455",
-      "cpu_percent": 12.86,
-      "ram_percent": 58.5,
-      "ram_used_gb": 37.204673767089844,
-      "gpu_memory_used": 4418.0,
-      "relative_time": 128.5798671245575
-    },
-    {
-      "timestamp": "2025-01-04T02:40:01.760893",
-      "cpu_percent": 81.59,
-      "ram_percent": 58.1,
-      "ram_used_gb": 36.97315216064453,
-      "gpu_memory_used": 4425.0,
-      "relative_time": 129.6643455028534
-    },
-    {
-      "timestamp": "2025-01-04T02:40:02.850000",
-      "cpu_percent": 14.55,
-      "ram_percent": 58.2,
-      "ram_used_gb": 37.0354118347168,
-      "gpu_memory_used": 4435.0,
-      "relative_time": 130.7529788017273
-    },
-    {
-      "timestamp": "2025-01-04T02:40:03.934586",
-      "cpu_percent": 13.16,
-      "ram_percent": 58.2,
-      "ram_used_gb": 37.03787612915039,
-      "gpu_memory_used": 4437.0,
-      "relative_time": 131.81812405586243
-    },
-    {
-      "timestamp": "2025-01-04T02:40:04.989915",
-      "cpu_percent": 11.0,
-      "ram_percent": 58.3,
-      "ram_used_gb": 37.09538650512695,
-      "gpu_memory_used": 4437.0,
-      "relative_time": 132.88244915008545
-    },
-    {
-      "timestamp": "2025-01-04T02:40:06.067559",
-      "cpu_percent": 32.79,
-      "ram_percent": 58.3,
-      "ram_used_gb": 37.07184982299805,
-      "gpu_memory_used": 4437.0,
-      "relative_time": 133.97513842582703
-    },
-    {
-      "timestamp": "2025-01-04T02:40:07.156881",
-      "cpu_percent": 22.34,
-      "ram_percent": 58.3,
-      "ram_used_gb": 37.07517623901367,
-      "gpu_memory_used": 4438.0,
-      "relative_time": 135.04176831245422
-    },
-    {
-      "timestamp": "2025-01-04T02:40:08.212791",
-      "cpu_percent": 14.86,
-      "ram_percent": 58.3,
-      "ram_used_gb": 37.08013153076172,
-      "gpu_memory_used": 4438.0,
-      "relative_time": 136.1427457332611
-    },
-    {
-      "timestamp": "2025-01-04T02:40:09.317884",
-      "cpu_percent": 21.55,
-      "ram_percent": 58.4,
-      "ram_used_gb": 37.144142150878906,
-      "gpu_memory_used": 4447.0,
-      "relative_time": 137.20455360412598
-    },
-    {
-      "timestamp": "2025-01-04T02:40:10.390292",
-      "cpu_percent": 26.97,
-      "ram_percent": 58.4,
-      "ram_used_gb": 37.141868591308594,
-      "gpu_memory_used": 4454.0,
-      "relative_time": 138.2930736541748
-    },
-    {
-      "timestamp": "2025-01-04T02:40:11.464548",
-      "cpu_percent": 12.21,
-      "ram_percent": 58.5,
-      "ram_used_gb": 37.205867767333984,
-      "gpu_memory_used": 4451.0,
-      "relative_time": 139.35198616981506
-    },
-    {
-      "timestamp": "2025-01-04T02:40:12.537687",
-      "cpu_percent": 13.14,
-      "ram_percent": 58.5,
-      "ram_used_gb": 37.233299255371094,
-      "gpu_memory_used": 4452.0,
-      "relative_time": 140.4236707687378
-    },
-    {
-      "timestamp": "2025-01-04T02:40:13.608158",
-      "cpu_percent": 13.93,
-      "ram_percent": 58.6,
-      "ram_used_gb": 37.25624465942383,
-      "gpu_memory_used": 4452.0,
-      "relative_time": 141.4932518005371
-    },
-    {
-      "timestamp": "2025-01-04T02:40:14.668199",
-      "cpu_percent": 15.76,
-      "ram_percent": 58.6,
-      "ram_used_gb": 37.278499603271484,
-      "gpu_memory_used": 4452.0,
-      "relative_time": 142.57055759429932
-    },
-    {
-      "timestamp": "2025-01-04T02:40:15.754506",
-      "cpu_percent": 14.59,
-      "ram_percent": 58.5,
-      "ram_used_gb": 37.21017837524414,
-      "gpu_memory_used": 4451.0,
-      "relative_time": 143.64187097549438
-    },
+      "timestamp": "2025-01-05T23:59:01.358046",
+      "cpu_percent": 34.9,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.3621711730957,
+      "gpu_memory_used": 6921.0,
+      "relative_time": 60.10921263694763
+    },
     {
-      "timestamp": "2025-01-04T02:40:16.827392",
-      "cpu_percent": 17.55,
-      "ram_percent": 58.4,
-      "ram_used_gb": 37.1205940246582,
-      "gpu_memory_used": 4450.0,
-      "relative_time": 144.75147438049316
-    },
+      "timestamp": "2025-01-05T23:59:02.434159",
+      "cpu_percent": 30.19,
+      "ram_percent": 77.6,
+      "ram_used_gb": 49.35695266723633,
+      "gpu_memory_used": 6919.0,
+      "relative_time": 61.16992497444153
+    },
     {
-      "timestamp": "2025-01-04T02:40:17.929858",
-      "cpu_percent": 55.09,
-      "ram_percent": 58.2,
-      "ram_used_gb": 37.04216766357422,
-      "gpu_memory_used": 4449.0,
-      "relative_time": 145.815936088562
-    },
+      "timestamp": "2025-01-05T23:59:03.495845",
+      "cpu_percent": 33.29,
+      "ram_percent": 77.9,
+      "ram_used_gb": 49.53657531738281,
+      "gpu_memory_used": 6916.0,
+      "relative_time": 62.276703119277954
+    },
     {
-      "timestamp": "2025-01-04T02:40:18.988009",
-      "cpu_percent": 13.92,
-      "ram_percent": 58.0,
-      "ram_used_gb": 36.90068054199219,
-      "gpu_memory_used": 4447.0,
-      "relative_time": 146.8880341053009
-    },
+      "timestamp": "2025-01-05T23:59:04.602367",
+      "cpu_percent": 43.51,
+      "ram_percent": 77.9,
+      "ram_used_gb": 49.58420181274414,
+      "gpu_memory_used": 6919.0,
+      "relative_time": 63.39539980888367
+    },
     {
-      "timestamp": "2025-01-04T02:40:20.062567",
-      "cpu_percent": 17.42,
-      "ram_percent": 57.7,
-      "ram_used_gb": 36.69455337524414,
-      "gpu_memory_used": 4446.0,
-      "relative_time": 147.96440315246582
-    },
+      "timestamp": "2025-01-05T23:59:05.720517",
+      "cpu_percent": 40.01,
+      "ram_percent": 78.1,
+      "ram_used_gb": 49.65230941772461,
+      "gpu_memory_used": 6919.0,
+      "relative_time": 64.45865726470947
+    },
     {
-      "timestamp": "2025-01-04T02:40:21.149129",
-      "cpu_percent": 14.78,
-      "ram_percent": 57.4,
-      "ram_used_gb": 36.50687789916992,
-      "gpu_memory_used": 4445.0,
-      "relative_time": 149.041100025177
-    },
+      "timestamp": "2025-01-05T23:59:06.783467",
+      "cpu_percent": 30.16,
+      "ram_percent": 78.1,
+      "ram_used_gb": 49.673973083496094,
+      "gpu_memory_used": 6918.0,
+      "relative_time": 65.54732704162598
+    },
     {
-      "timestamp": "2025-01-04T02:40:22.221780",
-      "cpu_percent": 11.1,
-      "ram_percent": 57.0,
-      "ram_used_gb": 36.28267288208008,
-      "gpu_memory_used": 4438.0,
-      "relative_time": 150.125506401062
-    },
+      "timestamp": "2025-01-05T23:59:07.872827",
+      "cpu_percent": 22.64,
+      "ram_percent": 78.0,
+      "ram_used_gb": 49.64229202270508,
+      "gpu_memory_used": 6918.0,
+      "relative_time": 66.6279137134552
+    },
     {
-      "timestamp": "2025-01-04T02:40:23.308492",
-      "cpu_percent": 12.21,
-      "ram_percent": 56.7,
-      "ram_used_gb": 36.036773681640625,
-      "gpu_memory_used": 4436.0,
-      "relative_time": 151.19524502754211
-    },
+      "timestamp": "2025-01-05T23:59:08.954513",
+      "cpu_percent": 45.52,
+      "ram_percent": 77.9,
+      "ram_used_gb": 49.53507995605469,
+      "gpu_memory_used": 6922.0,
+      "relative_time": 67.7145938873291
+    },
     {
-      "timestamp": "2025-01-04T02:40:24.381177",
-      "cpu_percent": 13.79,
-      "ram_percent": 56.3,
-      "ram_used_gb": 35.83684539794922,
-      "gpu_memory_used": 4436.0,
-      "relative_time": 152.26534175872803
-    },
+      "timestamp": "2025-01-05T23:59:10.039422",
+      "cpu_percent": 40.01,
+      "ram_percent": 77.0,
+      "ram_used_gb": 48.967567443847656,
+      "gpu_memory_used": 6923.0,
+      "relative_time": 68.80349135398865
+    },
     {
-      "timestamp": "2025-01-04T02:40:25.452457",
-      "cpu_percent": 12.28,
-      "ram_percent": 56.4,
-      "ram_used_gb": 35.848087310791016,
-      "gpu_memory_used": 4436.0,
-      "relative_time": 153.33880996704102
-    },
+      "timestamp": "2025-01-05T23:59:11.129835",
+      "cpu_percent": 36.44,
+      "ram_percent": 71.2,
+      "ram_used_gb": 45.274654388427734,
+      "gpu_memory_used": 6923.0,
+      "relative_time": 69.87114715576172
+    },
     {
-      "timestamp": "2025-01-04T02:40:26.521613",
-      "cpu_percent": 12.52,
-      "ram_percent": 56.8,
-      "ram_used_gb": 36.1606330871582,
-      "gpu_memory_used": 4440.0,
-      "relative_time": 154.40920901298523
-    },
+      "timestamp": "2025-01-05T23:59:12.203989",
+      "cpu_percent": 38.48,
+      "ram_percent": 71.5,
+      "ram_used_gb": 45.46482849121094,
+      "gpu_memory_used": 6921.0,
+      "relative_time": 70.97379231452942
+    },
     {
-      "timestamp": "2025-01-04T02:40:27.587547",
-      "cpu_percent": 7.94,
-      "ram_percent": 57.2,
-      "ram_used_gb": 36.37208557128906,
-      "gpu_memory_used": 4440.0,
-      "relative_time": 155.46942234039307
-    },
+      "timestamp": "2025-01-05T23:59:13.331638",
+      "cpu_percent": 29.37,
+      "ram_percent": 71.4,
+      "ram_used_gb": 45.428951263427734,
+      "gpu_memory_used": 6924.0,
+      "relative_time": 72.11824035644531
+    },
     {
-      "timestamp": "2025-01-04T02:40:28.647400",
-      "cpu_percent": 8.85,
-      "ram_percent": 57.3,
-      "ram_used_gb": 36.470054626464844,
-      "gpu_memory_used": 4440.0,
-      "relative_time": 156.53129720687866
+      "timestamp": "2025-01-05T23:59:14.510728",
+      "cpu_percent": 29.0,
+      "ram_percent": 71.1,
+      "ram_used_gb": 45.226200103759766,
+      "gpu_memory_used": 6916.0,
+      "relative_time": 73.27950477600098
     }
   ],
-  "test_duration": 159.19756031036377
+  "test_duration": 76.54887413978577
 }
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt
index cb2df6a..91be8e5 100644
--- a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt
+++ b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt
@@ -1,23 +1,23 @@
 === Benchmark Statistics (with correct RTF) ===
 
-Total tokens processed: 17150
-Total audio generated (s): 5296.38
-Total test duration (s): 155.23
-Average processing rate (tokens/s): 102.86
-Average RTF: 0.03
-Average Real Time Speed: 31.25
+Total tokens processed: 3150
+Total audio generated (s): 1056.03
+Total test duration (s): 70.20
+Average processing rate (tokens/s): 46.46
+Average RTF: 0.07
+Average Real Time Speed: 15.00
 
 === Per-chunk Stats ===
 
-Average chunk size (tokens): 1715.00
+Average chunk size (tokens): 525.00
 Min chunk size (tokens): 150
-Max chunk size (tokens): 5000
-Average processing time (s): 15.39
-Average output length (s): 529.64
+Max chunk size (tokens): 900
+Average processing time (s): 11.57
+Average output length (s): 176.00
 
 === Performance Ranges ===
 
-Processing rate range (tokens/s): 80.65 - 125.10
-RTF range: 0.03x - 0.04x
-Real Time Speed range: 25.00x - 33.33x
+Processing rate range (tokens/s): 40.07 - 53.57
+RTF range: 0.06x - 0.08x
+Real Time Speed range: 12.50x - 16.67x
 
diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png
index 339c896..2317c52 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png
index 3e5c8d1..919c53b 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png
index e209978..5143bda 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_latency.png b/examples/assorted_checks/benchmarks/output_plots/first_token_latency.png
deleted file mode 100644
index d969fb8..0000000
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_latency.png and /dev/null differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png
index 4ed4b65..75c1a19 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png and b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png
index 56f6e31..c78f28c 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png and b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline.png b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline.png
deleted file mode 100644
index 251b172..0000000
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline.png and /dev/null differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png
index dd7162c..47ac017 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png and b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png
index 4b36a87..64f2208 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png and b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/format_comparison.png b/examples/assorted_checks/benchmarks/output_plots/format_comparison.png
deleted file mode 100644
index 95ac515..0000000
Binary files a/examples/assorted_checks/benchmarks/output_plots/format_comparison.png and /dev/null differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png
index 62c6864..ebf7bcf 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png
index 1c5d7b7..bcdacda 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png
index 942b3a8..a6a6ea5 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_usage.png b/examples/assorted_checks/benchmarks/output_plots/gpu_usage.png
deleted file mode 100644
index 1bc44dc..0000000
Binary files a/examples/assorted_checks/benchmarks/output_plots/gpu_usage.png and /dev/null differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png
index e595fff..4e94aaa 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png and b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png
index 47cfbde..05088c3 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png and b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png differ
diff --git a/examples/assorted_checks/generate_readme_plots.py b/examples/assorted_checks/generate_readme_plots.py
new file mode 100644
index 0000000..a6e5da2
--- /dev/null
+++ b/examples/assorted_checks/generate_readme_plots.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+"""Script to generate all plots needed for the README."""
+
+import os
+import sys
+import shutil
+from pathlib import Path
+
+from validate_wav import validate_tts
+
+# Get absolute paths
+script_dir = Path(__file__).parent.resolve()
+project_root = script_dir.parent.parent
+
+# Add directories to Python path for imports
+sys.path.append(str(script_dir))
+sys.path.append(str(script_dir / "benchmarks"))
+
+# Import test scripts
+from benchmark_tts_rtf import main as benchmark_rtf
+from test_formats.test_audio_formats import main as test_formats
+from benchmark_first_token_stream_unified import main as benchmark_stream
+from test_combinations.test_analyze_combined_voices import main as test_voice_analysis
+
+# Remove directories from path after imports
+sys.path.remove(str(script_dir))
+sys.path.remove(str(script_dir / "benchmarks"))
+
+
+def ensure_assets_dir():
+    """Create assets directory if it doesn't exist."""
+    assets_dir = project_root / "assets"
+    assets_dir.mkdir(exist_ok=True)
+    return assets_dir
+
+
+def copy_plot(src_path: str, dest_name: str, assets_dir: Path):
+    """Copy a plot to the assets directory with a new name."""
+    if os.path.exists(src_path):
+        shutil.copy2(src_path, assets_dir / dest_name)
+        print(f"Copied {src_path} to {assets_dir / dest_name}")
+    else:
+        print(f"Warning: Source plot not found at {src_path}")
+
+
+def validate_and_print(wav_path: str, category: str):
+    """Validate a WAV file and print results."""
+    if not os.path.exists(wav_path):
+        print(f"Warning: WAV file not found at {wav_path}")
+        return
+
+    print(f"\n=== Validating {category} Audio ===")
+    result = validate_tts(wav_path)
+
+    if "error" in result:
+        print(f"Error: {result['error']}")
+    else:
+        print(f"Duration: {result['duration']}")
+        print(f"Sample Rate: {result['sample_rate']} Hz")
+        print(f"Peak Amplitude: {result['peak_amplitude']}")
+        print(f"RMS Level: {result['rms_level']}")
+
+        if result["issues"]:
+            print("\nIssues Found:")
+            for issue in result["issues"]:
+                print(f"- {issue}")
+        else:
+            print("\nNo issues found")
+
+
+def main():
+    """Generate all plots needed for the README."""
+    # Ensure assets directory exists
+    prefix = "gpu"
+    assets_dir = ensure_assets_dir()
+
+    print("\n=== Generating Format Comparison Plot ===")
+    test_formats()
+    copy_plot(
+        str(script_dir / "test_formats/output/test_formats/format_comparison.png"),
+        "format_comparison.png",
+        assets_dir,
+    )
+    # Validate WAV output from format test
+    validate_and_print(
+        str(script_dir / "test_formats/output/test_formats/speech.wav"),
+        "Format Test WAV",
+    )
+
+    print("\n=== Generating Voice Analysis Plot ===")
+    test_voice_analysis()
+    copy_plot(
+        str(script_dir / "test_combinations/output/analysis_comparison.png"),
+        "voice_analysis.png",
+        assets_dir,
+    )
+    # Validate combined voice output
+    validate_and_print(
+        str(
+            script_dir
+            / "test_combinations/output/analysis_combined_af_bella_af_nicole.wav"
+        ),
+        "Combined Voice",
+    )
+
+    print("\n=== Generating Performance Benchmark Plots ===")
+    benchmark_rtf()
+    copy_plot(
+        str(script_dir / f"benchmarks/output_plots/{prefix}_processing_time_rtf.png"),
+        f"{prefix}_processing_time.png",
+        assets_dir,
+    )
+    copy_plot(
+        str(script_dir / f"benchmarks/output_plots/{prefix}_realtime_factor_rtf.png"),
+        f"{prefix}_realtime_factor.png",
+        assets_dir,
+    )
+    # Validate RTF benchmark output (~500 tokens)
+    validate_and_print(
+        str(script_dir / "benchmarks/output_audio/chunk_450_tokens.wav"),
+        "RTF Benchmark",
+    )
+
+    print("\n=== Generating Streaming Benchmark Plots ===")
+    benchmark_stream()
+
+    # Copy direct streaming plots
+    copy_plot(
+        str(script_dir / "benchmarks/output_plots/first_token_latency_stream.png"),
+        f"{prefix}_first_token_latency_direct.png",
+        assets_dir,
+    )
+    copy_plot(
+        str(script_dir / "benchmarks/output_plots/first_token_timeline_stream.png"),
+        f"{prefix}_first_token_timeline_direct.png",
+        assets_dir,
+    )
+    copy_plot(
+        str(script_dir / "benchmarks/output_plots/total_time_latency_stream.png"),
+        f"{prefix}_total_time_latency_direct.png",
+        assets_dir,
+    )
+
+    # Copy OpenAI streaming plots
+    copy_plot(
+        str(
+            script_dir / "benchmarks/output_plots/first_token_latency_stream_openai.png"
+        ),
+        f"{prefix}_first_token_latency_openai.png",
+        assets_dir,
+    )
+    copy_plot(
+        str(
+            script_dir
+            / "benchmarks/output_plots/first_token_timeline_stream_openai.png"
+        ),
+        f"{prefix}_first_token_timeline_openai.png",
+        assets_dir,
+    )
+    copy_plot(
+        str(
+            script_dir / "benchmarks/output_plots/total_time_latency_stream_openai.png"
+        ),
+        f"{prefix}_total_time_latency_openai.png",
+        assets_dir,
+    )
+
+    # Wait a moment for files to be generated
+    import time
+
+    time.sleep(2)
+
+    # Validate streaming outputs (~500 tokens)
+    validate_and_print(
+        str(
+            script_dir
+            / "benchmarks/output_audio_stream/benchmark_tokens500_run1_stream.wav"
+        ),
+        "Direct Streaming",
+    )
+    validate_and_print(
+        str(
+            script_dir
+            / "benchmarks/output_audio_stream_openai/benchmark_tokens500_run1_stream_openai.wav"
+        ),
+        "OpenAI Streaming",
+    )
+
+    validate_and_print(
+        str(script_dir / "test_formats/output/test_formats/test_audio.wav"),
+        "Format Test WAV",
+    )
+
+    print("\nAll plots have been generated and copied to the assets directory")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py b/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py
index ec280e2..134f554 100644
--- a/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py
+++ b/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py
@@ -73,6 +73,7 @@ def generate_speech(
                 "voice": voice,
                 "speed": 1.0,
                 "response_format": "wav",  # Use WAV for analysis
+                "stream": False,
             },
         )
 
@@ -193,9 +194,10 @@ def plot_analysis(audio_files: Dict[str, str], output_dir: str):
     fig.patch.set_facecolor("#1a1a2e")
     num_files = len(audio_files)
 
-    # Create subplot grid with proper spacing
+    # Create subplot grid with proper spacing for waveforms and metrics
+    total_rows = num_files + 2  # Add one more row for metrics
     gs = plt.GridSpec(
-        num_files + 1, 2, height_ratios=[1.5] * num_files + [1], hspace=0.4, wspace=0.3
+        total_rows, 2, height_ratios=[1.5] * num_files + [1, 1], hspace=0.4, wspace=0.3
     )
 
     # Analyze all files first
@@ -216,48 +218,74 @@ def plot_analysis(audio_files: Dict[str, str], output_dir: str):
     # Colors for voices
     colors = ["#ff2a6d", "#05d9e8", "#d1f7ff"]
 
-    # Create two subplots for metrics with similar scales
-    # Left subplot: Brightness and Volume
-    ax1 = plt.subplot(gs[num_files, 0])
-    metrics1 = [
+    # Create metrics for each subplot
+    metrics = [
         (
-            "Brightness",
-            [chars["spectral_centroid"] / 1000 for chars in all_chars.values()],
-            "kHz",
-        ),
-        ("Volume", [chars["rms"] * 100 for chars in all_chars.values()], "RMS×100"),
-    ]
-
-    # Right subplot: Voice Pitch and Texture
-    ax2 = plt.subplot(gs[num_files, 1])
-    metrics2 = [
-        (
-            "Voice Pitch",
-            [min(chars["dominant_frequencies"]) for chars in all_chars.values()],
-            "Hz",
+            plt.subplot(gs[num_files, 0]),
+            [
+                (
+                    "Volume",
+                    [chars["rms"] * 100 for chars in all_chars.values()],
+                    "RMS×100",
+                )
+            ],
         ),
         (
-            "Texture",
-            [chars["zero_crossing_rate"] * 1000 for chars in all_chars.values()],
-            "ZCR×1000",
+            plt.subplot(gs[num_files, 1]),
+            [
+                (
+                    "Brightness",
+                    [chars["spectral_centroid"] / 1000 for chars in all_chars.values()],
+                    "kHz",
+                )
+            ],
+        ),
+        (
+            plt.subplot(gs[num_files + 1, 0]),
+            [
+                (
+                    "Voice Pitch",
+                    [
+                        min(chars["dominant_frequencies"])
+                        for chars in all_chars.values()
+                    ],
+                    "Hz",
+                )
+            ],
+        ),
+        (
+            plt.subplot(gs[num_files + 1, 1]),
+            [
+                (
+                    "Texture",
+                    [
+                        chars["zero_crossing_rate"] * 1000
+                        for chars in all_chars.values()
+                    ],
+                    "ZCR×1000",
+                )
+            ],
         ),
     ]
 
-    def plot_grouped_bars(ax, metrics, show_legend=True):
-        n_groups = len(metrics)
+    # Plot each metric
+    for i, (ax, metric_data) in enumerate(metrics):
         n_voices = len(audio_files)
         bar_width = 0.25
+        indices = np.array([0])
 
-        indices = np.arange(n_groups)
+        values = metric_data[0][1]
+        max_val = max(values)
 
-        # Get max value for y-axis scaling
-        max_val = max(max(m[1]) for m in metrics)
-
-        for i, (voice, color) in enumerate(zip(audio_files.keys(), colors)):
-            values = [m[1][i] for m in metrics]
-            offset = (i - n_voices / 2 + 0.5) * bar_width
+        for j, (voice, color) in enumerate(zip(audio_files.keys(), colors)):
+            offset = (j - n_voices / 2 + 0.5) * bar_width
             bars = ax.bar(
-                indices + offset, values, bar_width, label=voice, color=color, alpha=0.8
+                indices + offset,
+                [values[j]],
+                bar_width,
+                label=voice,
+                color=color,
+                alpha=0.8,
             )
 
             # Add value labels on top of bars
@@ -274,12 +302,12 @@ def plot_analysis(audio_files: Dict[str, str], output_dir: str):
                 )
 
         ax.set_xticks(indices)
-        ax.set_xticklabels([f"{m[0]}\n({m[2]})" for m in metrics])
-
-        # Set y-axis limits with some padding
+        ax.set_xticklabels([f"{metric_data[0][0]}\n({metric_data[0][2]})"])
         ax.set_ylim(0, max_val * 1.2)
+        ax.set_ylabel("Value")
 
-        if show_legend:
+        # Only show legend on first metric plot
+        if i == 0:
             ax.legend(
                 bbox_to_anchor=(1.05, 1),
                 loc="upper left",
@@ -287,22 +315,11 @@ def plot_analysis(audio_files: Dict[str, str], output_dir: str):
                 edgecolor="#ffffff",
             )
 
-    # Plot both subplots
-    plot_grouped_bars(ax1, metrics1, show_legend=True)
-    plot_grouped_bars(ax2, metrics2, show_legend=False)
+        # Style the subplot
+        setup_plot(fig, ax, metric_data[0][0])
 
-    # Style both subplots
-    setup_plot(fig, ax1, "Brightness and Volume")
-    setup_plot(fig, ax2, "Voice Pitch and Texture")
-
-    # Add y-axis labels
-    ax1.set_ylabel("Value")
-    ax2.set_ylabel("Value")
-
-    # Adjust the figure size to accommodate the legend
-    fig.set_size_inches(15, 15)
-
-    # Add padding around the entire figure
+    # Adjust the figure size and padding
+    fig.set_size_inches(15, 20)
     plt.subplots_adjust(right=0.85, top=0.95, bottom=0.05, left=0.1)
     plt.savefig(os.path.join(output_dir, "analysis_comparison.png"), dpi=300)
     print(f"Saved analysis comparison to {output_dir}/analysis_comparison.png")
@@ -332,7 +349,7 @@ def main():
     )
     parser.add_argument("--url", default="http://localhost:8880", help="API base URL")
     parser.add_argument(
-        "--output-dir", 
+        "--output-dir",
         default="examples/assorted_checks/test_combinations/output",
         help="Output directory for audio files",
     )
diff --git a/examples/assorted_checks/test_formats/test_audio_formats.py b/examples/assorted_checks/test_formats/test_audio_formats.py
index e126dec..68156b6 100644
--- a/examples/assorted_checks/test_formats/test_audio_formats.py
+++ b/examples/assorted_checks/test_formats/test_audio_formats.py
@@ -66,26 +66,27 @@ def plot_format_comparison(stats: list, output_dir: str):
     for i, stat in enumerate(stats):
         format_name = stat["format"].upper()
         try:
-            # Handle PCM format differently
-            if stat["format"] == "pcm":
-                # Read raw PCM data (16-bit mono)
-                with open(
-                    os.path.join(output_dir, f"test_audio.{stat['format']}"), "rb"
-                ) as f:
-                    raw_data = f.read()
-                data = np.frombuffer(raw_data, dtype=np.int16)
-                data = data.astype(np.float32) / 32768.0  # Convert to float [-1, 1]
-                sr = 24000
-            else:
-                # Read other formats with soundfile
-                data, sr = sf.read(
-                    os.path.join(output_dir, f"test_audio.{stat['format']}")
-                )
+            file_path = os.path.join(output_dir, f"test_audio.{stat['format']}")
 
-            # Plot waveform
+            if stat["format"] == "wav":
+                # Use scipy.io.wavfile for WAV files
+                sr, data = wavfile.read(file_path)
+                data = data.astype(np.float32) / 32768.0  # Convert to float [-1, 1]
+            elif stat["format"] == "pcm":
+                # Read raw 16-bit signed little-endian PCM data at 24kHz
+                data = np.frombuffer(
+                    open(file_path, "rb").read(), dtype="<i2"
+                )  # '<i2' means little-endian 16-bit signed int
+                data = data.astype(np.float32) / 32768.0  # Convert to float [-1, 1]
+                sr = 24000  # Known sample rate for our endpoint
+            else:
+                # Use soundfile for other formats (mp3, opus, flac)
+                data, sr = sf.read(file_path)
+
+            # Plot waveform with consistent normalization
             ax = plt.subplot(gs_waves[i])
             time = np.arange(len(data)) / sr
-            plt.plot(time, data / np.max(np.abs(data)), linewidth=0.5, color="#ff2a6d")
+            plt.plot(time, data, linewidth=0.5, color="#ff2a6d")
             ax.set_xlabel("Time (seconds)")
             ax.set_ylabel("")
             ax.set_ylim(-1.1, 1.1)
@@ -200,41 +201,42 @@ def get_audio_stats(file_path: str) -> dict:
     """Get audio file statistics"""
     file_size = os.path.getsize(file_path)
     file_size_kb = file_size / 1024  # Convert to KB
+    format_name = Path(file_path).suffix[1:]
 
-    try:
-        # Try reading with soundfile first
+    if format_name == "wav":
+        # Use scipy.io.wavfile for WAV files
+        sample_rate, data = wavfile.read(file_path)
+        data = data.astype(np.float32) / 32768.0  # Convert to float [-1, 1]
+        duration = len(data) / sample_rate
+        channels = 1 if len(data.shape) == 1 else data.shape[1]
+    elif format_name == "pcm":
+        # For PCM, read raw 16-bit signed little-endian PCM data at 24kHz
+        data = np.frombuffer(
+            open(file_path, "rb").read(), dtype="<i2"
+        )  # '<i2' means little-endian 16-bit signed int
+        data = data.astype(np.float32) / 32768.0  # Normalize to [-1, 1]
+        sample_rate = 24000  # Known sample rate for our endpoint
+        duration = len(data) / sample_rate
+        channels = 1
+    else:
+        # Use soundfile for other formats (mp3, opus, flac)
         data, sample_rate = sf.read(file_path)
         duration = len(data) / sample_rate
         channels = 1 if len(data.shape) == 1 else data.shape[1]
 
-        # Calculate audio statistics
-        stats = {
-            "format": Path(file_path).suffix[1:],
-            "file_size_kb": round(file_size_kb, 2),
-            "duration_seconds": round(duration, 2),
-            "sample_rate": sample_rate,
-            "channels": channels,
-            "min_amplitude": float(np.min(data)),
-            "max_amplitude": float(np.max(data)),
-            "mean_amplitude": float(np.mean(np.abs(data))),
-            "rms_amplitude": float(np.sqrt(np.mean(np.square(data)))),
-        }
-        return stats
-    except:
-        # For PCM, read raw bytes and estimate duration
-        with open(file_path, "rb") as f:
-            data = f.read()
-            # Assuming 16-bit PCM mono at 24kHz
-            samples = len(data) // 2  # 2 bytes per sample
-            duration = samples / 24000
-            return {
-                "format": "pcm",
-                "file_size_kb": round(file_size_kb, 2),
-                "duration_seconds": round(duration, 2),
-                "sample_rate": 24000,
-                "channels": 1,
-                "note": "PCM stats are estimated from raw bytes",
-            }
+    # Calculate audio statistics
+    stats = {
+        "format": format_name,
+        "file_size_kb": round(file_size_kb, 2),
+        "duration_seconds": round(duration, 2),
+        "sample_rate": sample_rate,
+        "channels": channels,
+        "min_amplitude": float(np.min(data)),
+        "max_amplitude": float(np.max(data)),
+        "mean_amplitude": float(np.mean(np.abs(data))),
+        "rms_amplitude": float(np.sqrt(np.mean(np.square(data)))),
+    }
+    return stats
 
 
 def main():
@@ -254,13 +256,49 @@ def main():
 
         # Generate and save
         start_time = time.time()
-        response = client.audio.speech.create(
-            model="kokoro", voice=voice, input=SAMPLE_TEXT, response_format=fmt
+
+        # Use requests with stream=False for consistent data handling
+        response = requests.post(
+            "http://localhost:8880/v1/audio/speech",
+            json={
+                "model": "kokoro",
+                "voice": voice,
+                "input": SAMPLE_TEXT,
+                "response_format": fmt,
+                "stream": False,  # Explicitly disable streaming to get single complete chunk
+            },
+            stream=False,
+            headers={"Accept": f"audio/{fmt}"},  # Explicitly request audio format
         )
         generation_time = time.time() - start_time
 
-        with open(output_path, "wb") as f:
-            f.write(response.content)
+        print(f"\nResponse headers for {fmt}:")
+        for header, value in response.headers.items():
+            print(f"{header}: {value}")
+        print(f"Content length: {len(response.content)} bytes")
+        print(f"First few bytes: {response.content[:20].hex()}")
+
+        # Write the file and verify it was written correctly
+        try:
+            with open(output_path, "wb") as f:
+                f.write(response.content)
+
+            # Verify file was written
+            if not output_path.exists():
+                raise Exception(f"Failed to write {fmt} file")
+
+            # Check file size matches content length
+            written_size = output_path.stat().st_size
+            if written_size != len(response.content):
+                raise Exception(
+                    f"File size mismatch: expected {len(response.content)} bytes, got {written_size}"
+                )
+
+            print(f"Successfully wrote {fmt} file")
+
+        except Exception as e:
+            print(f"Error writing {fmt} file: {e}")
+            continue
 
         # Get stats
         file_stats = get_audio_stats(str(output_path))
diff --git a/examples/assorted_checks/test_normalizer.py b/examples/assorted_checks/test_normalizer.py
index 2f4bab7..82a4223 100644
--- a/examples/assorted_checks/test_normalizer.py
+++ b/examples/assorted_checks/test_normalizer.py
@@ -4,15 +4,19 @@ import random
 import string
 from typing import List, Tuple
 
+
 def create_test_cases() -> List[str]:
     """Create a variety of test cases with different characteristics"""
-    
+
     # Helper to create random text with specific patterns
     def random_text(length: int) -> str:
-        return ''.join(random.choice(string.ascii_letters + string.digits + " .,!?") for _ in range(length))
-    
+        return "".join(
+            random.choice(string.ascii_letters + string.digits + " .,!?")
+            for _ in range(length)
+        )
+
     test_cases = []
-    
+
     # Base test cases that hit specific patterns
     base_cases = [
         "Dr. Smith and Mr. Jones discussed the $1,234.56 million investment.",
@@ -21,10 +25,10 @@ def create_test_cases() -> List[str]:
         "X's and Y's properties cost £50 million in the 1990s",
         "こんにちは。今日は！",
     ]
-    
+
     # Add base cases
     test_cases.extend(base_cases)
-    
+
     # Add variations with random content
     for length in [100, 1000, 10000]:
         # Create 3 variations of each length
@@ -35,23 +39,24 @@ def create_test_cases() -> List[str]:
             text = text.replace(text[30:40], "$1,234.56")
             text = text.replace(text[50:60], "A.B.C. xyz")
             test_cases.append(text)
-    
+
     return test_cases
 
+
 class TextNormalizerInline:
     """Text normalizer using inline patterns"""
-    
+
     def normalize(self, text: str) -> str:
         # Replace quotes and brackets
         text = text.replace(chr(8216), "'").replace(chr(8217), "'")
         text = text.replace("«", chr(8220)).replace("»", chr(8221))
         text = text.replace(chr(8220), '"').replace(chr(8221), '"')
         text = text.replace("(", "«").replace(")", "»")
-        
+
         # Handle CJK punctuation
         for a, b in zip("、。！，：；？", ",.!,:;?"):
             text = text.replace(a, b + " ")
-        
+
         text = re.sub(r"[^\S \n]", " ", text)
         text = re.sub(r"  +", " ", text)
         text = re.sub(r"(?<=\n) +(?=\n)", "", text)
@@ -61,108 +66,132 @@ class TextNormalizerInline:
         text = re.sub(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))", "Mrs", text)
         text = re.sub(r"\betc\.(?! [A-Z])", "etc", text)
         text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text)
-        text = re.sub(r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)", split_num, text)
+        text = re.sub(
+            r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)",
+            split_num,
+            text,
+        )
         text = re.sub(r"(?<=\d),(?=\d)", "", text)
-        text = re.sub(r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b", handle_money, text)
+        text = re.sub(
+            r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b",
+            handle_money,
+            text,
+        )
         text = re.sub(r"\d*\.\d+", handle_decimal, text)
         text = re.sub(r"(?<=\d)-(?=\d)", " to ", text)
         text = re.sub(r"(?<=\d)S", " S", text)
         text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
         text = re.sub(r"(?<=X')S\b", "s", text)
-        text = re.sub(r"(?:[A-Za-z]\.){2,} [a-z]", lambda m: m.group().replace(".", "-"), text)
+        text = re.sub(
+            r"(?:[A-Za-z]\.){2,} [a-z]", lambda m: m.group().replace(".", "-"), text
+        )
         text = re.sub(r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text)
-        
+
         return text.strip()
 
+
 class TextNormalizerCompiled:
     """Text normalizer using all compiled patterns"""
-    
+
     def __init__(self):
         self.patterns = {
-            'whitespace': re.compile(r"[^\S \n]"),
-            'multi_space': re.compile(r"  +"),
-            'newline_space': re.compile(r"(?<=\n) +(?=\n)"),
-            'doctor': re.compile(r"\bD[Rr]\.(?= [A-Z])"),
-            'mister': re.compile(r"\b(?:Mr\.|MR\.(?= [A-Z]))"),
-            'miss': re.compile(r"\b(?:Ms\.|MS\.(?= [A-Z]))"),
-            'mrs': re.compile(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))"),
-            'etc': re.compile(r"\betc\.(?! [A-Z])"),
-            'yeah': re.compile(r"(?i)\b(y)eah?\b"),
-            'numbers': re.compile(r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)"),
-            'comma_in_number': re.compile(r"(?<=\d),(?=\d)"),
-            'money': re.compile(r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b"),
-            'decimal': re.compile(r"\d*\.\d+"),
-            'range': re.compile(r"(?<=\d)-(?=\d)"),
-            's_after_number': re.compile(r"(?<=\d)S"),
-            'possessive_s': re.compile(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b"),
-            'x_possessive': re.compile(r"(?<=X')S\b"),
-            'initials': re.compile(r"(?:[A-Za-z]\.){2,} [a-z]"),
-            'single_initial': re.compile(r"(?i)(?<=[A-Z])\.(?=[A-Z])")
+            "whitespace": re.compile(r"[^\S \n]"),
+            "multi_space": re.compile(r"  +"),
+            "newline_space": re.compile(r"(?<=\n) +(?=\n)"),
+            "doctor": re.compile(r"\bD[Rr]\.(?= [A-Z])"),
+            "mister": re.compile(r"\b(?:Mr\.|MR\.(?= [A-Z]))"),
+            "miss": re.compile(r"\b(?:Ms\.|MS\.(?= [A-Z]))"),
+            "mrs": re.compile(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))"),
+            "etc": re.compile(r"\betc\.(?! [A-Z])"),
+            "yeah": re.compile(r"(?i)\b(y)eah?\b"),
+            "numbers": re.compile(
+                r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)"
+            ),
+            "comma_in_number": re.compile(r"(?<=\d),(?=\d)"),
+            "money": re.compile(
+                r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b"
+            ),
+            "decimal": re.compile(r"\d*\.\d+"),
+            "range": re.compile(r"(?<=\d)-(?=\d)"),
+            "s_after_number": re.compile(r"(?<=\d)S"),
+            "possessive_s": re.compile(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b"),
+            "x_possessive": re.compile(r"(?<=X')S\b"),
+            "initials": re.compile(r"(?:[A-Za-z]\.){2,} [a-z]"),
+            "single_initial": re.compile(r"(?i)(?<=[A-Z])\.(?=[A-Z])"),
         }
-    
+
     def normalize(self, text: str) -> str:
         # Replace quotes and brackets
         text = text.replace(chr(8216), "'").replace(chr(8217), "'")
         text = text.replace("«", chr(8220)).replace("»", chr(8221))
         text = text.replace(chr(8220), '"').replace(chr(8221), '"')
         text = text.replace("(", "«").replace(")", "»")
-        
+
         # Handle CJK punctuation
         for a, b in zip("、。！，：；？", ",.!,:;?"):
             text = text.replace(a, b + " ")
-        
+
         # Use compiled patterns
-        text = self.patterns['whitespace'].sub(" ", text)
-        text = self.patterns['multi_space'].sub(" ", text)
-        text = self.patterns['newline_space'].sub("", text)
-        text = self.patterns['doctor'].sub("Doctor", text)
-        text = self.patterns['mister'].sub("Mister", text)
-        text = self.patterns['miss'].sub("Miss", text)
-        text = self.patterns['mrs'].sub("Mrs", text)
-        text = self.patterns['etc'].sub("etc", text)
-        text = self.patterns['yeah'].sub(r"\1e'a", text)
-        text = self.patterns['numbers'].sub(split_num, text)
-        text = self.patterns['comma_in_number'].sub("", text)
-        text = self.patterns['money'].sub(handle_money, text)
-        text = self.patterns['decimal'].sub(handle_decimal, text)
-        text = self.patterns['range'].sub(" to ", text)
-        text = self.patterns['s_after_number'].sub(" S", text)
-        text = self.patterns['possessive_s'].sub("'S", text)
-        text = self.patterns['x_possessive'].sub("s", text)
-        text = self.patterns['initials'].sub(lambda m: m.group().replace(".", "-"), text)
-        text = self.patterns['single_initial'].sub("-", text)
-        
+        text = self.patterns["whitespace"].sub(" ", text)
+        text = self.patterns["multi_space"].sub(" ", text)
+        text = self.patterns["newline_space"].sub("", text)
+        text = self.patterns["doctor"].sub("Doctor", text)
+        text = self.patterns["mister"].sub("Mister", text)
+        text = self.patterns["miss"].sub("Miss", text)
+        text = self.patterns["mrs"].sub("Mrs", text)
+        text = self.patterns["etc"].sub("etc", text)
+        text = self.patterns["yeah"].sub(r"\1e'a", text)
+        text = self.patterns["numbers"].sub(split_num, text)
+        text = self.patterns["comma_in_number"].sub("", text)
+        text = self.patterns["money"].sub(handle_money, text)
+        text = self.patterns["decimal"].sub(handle_decimal, text)
+        text = self.patterns["range"].sub(" to ", text)
+        text = self.patterns["s_after_number"].sub(" S", text)
+        text = self.patterns["possessive_s"].sub("'S", text)
+        text = self.patterns["x_possessive"].sub("s", text)
+        text = self.patterns["initials"].sub(
+            lambda m: m.group().replace(".", "-"), text
+        )
+        text = self.patterns["single_initial"].sub("-", text)
+
         return text.strip()
 
+
 class TextNormalizerHybrid:
     """Text normalizer using hybrid approach - compile only complex/frequent patterns"""
-    
+
     def __init__(self):
         # Only compile patterns that are complex or frequently used
         self.patterns = {
-            'whitespace': re.compile(r"[^\S \n]"),
-            'numbers': re.compile(r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)"),
-            'money': re.compile(r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b"),
-            'initials': re.compile(r"(?:[A-Za-z]\.){2,} [a-z]")
+            "whitespace": re.compile(r"[^\S \n]"),
+            "numbers": re.compile(
+                r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)"
+            ),
+            "money": re.compile(
+                r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b"
+            ),
+            "initials": re.compile(r"(?:[A-Za-z]\.){2,} [a-z]"),
         }
-    
+
     def normalize(self, text: str) -> str:
         # Replace quotes and brackets
         text = text.replace(chr(8216), "'").replace(chr(8217), "'")
         text = text.replace("«", chr(8220)).replace("»", chr(8221))
         text = text.replace(chr(8220), '"').replace(chr(8221), '"')
         text = text.replace("(", "«").replace(")", "»")
-        
+
         # Handle CJK punctuation
         for a, b in zip("、。！，：；？", ",.!,:;?"):
             text = text.replace(a, b + " ")
-        
+
         # Use compiled patterns for complex operations
-        text = self.patterns['whitespace'].sub(" ", text)
-        text = self.patterns['numbers'].sub(split_num, text)
-        text = self.patterns['money'].sub(handle_money, text)
-        text = self.patterns['initials'].sub(lambda m: m.group().replace(".", "-"), text)
-        
+        text = self.patterns["whitespace"].sub(" ", text)
+        text = self.patterns["numbers"].sub(split_num, text)
+        text = self.patterns["money"].sub(handle_money, text)
+        text = self.patterns["initials"].sub(
+            lambda m: m.group().replace(".", "-"), text
+        )
+
         # Use inline patterns for simpler operations
         text = re.sub(r"  +", " ", text)
         text = re.sub(r"(?<=\n) +(?=\n)", "", text)
@@ -179,9 +208,10 @@ class TextNormalizerHybrid:
         text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
         text = re.sub(r"(?<=X')S\b", "s", text)
         text = re.sub(r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text)
-        
+
         return text.strip()
 
+
 def split_num(match: re.Match) -> str:
     """Split numbers for TTS processing"""
     num = match.group(0)
@@ -192,61 +222,70 @@ def split_num(match: re.Match) -> str:
         return f"{num[:-1]} s"
     return num
 
+
 def handle_money(match: re.Match) -> str:
     """Format money strings for TTS"""
     text = match.group(0)
     return text.replace("$", " dollars ").replace("£", " pounds ")
 
+
 def handle_decimal(match: re.Match) -> str:
     """Format decimal numbers for TTS"""
     num = match.group(0)
     return num.replace(".", " point ")
 
-def benchmark_normalizers(test_cases: List[str], iterations: int = 100) -> Tuple[float, float, float]:
+
+def benchmark_normalizers(
+    test_cases: List[str], iterations: int = 100
+) -> Tuple[float, float, float]:
     """Benchmark all three implementations"""
-    
+
     normalizers = {
-        'inline': TextNormalizerInline(),
-        'compiled': TextNormalizerCompiled(),
-        'hybrid': TextNormalizerHybrid()
+        "inline": TextNormalizerInline(),
+        "compiled": TextNormalizerCompiled(),
+        "hybrid": TextNormalizerHybrid(),
     }
-    
+
     results = {}
-    
+
     # Test each normalizer
     for name, normalizer in normalizers.items():
         start = time.perf_counter()
-        
+
         # Run normalizations
         for _ in range(iterations):
             for test in test_cases:
                 normalizer.normalize(test)
-        
+
         results[name] = time.perf_counter() - start
-    
+
     return results
 
+
 def verify_outputs(test_cases: List[str]) -> bool:
     """Verify that all implementations produce identical output"""
     normalizers = {
-        'inline': TextNormalizerInline(),
-        'compiled': TextNormalizerCompiled(),
-        'hybrid': TextNormalizerHybrid()
+        "inline": TextNormalizerInline(),
+        "compiled": TextNormalizerCompiled(),
+        "hybrid": TextNormalizerHybrid(),
     }
-    
+
     for test in test_cases:
         results = [norm.normalize(test) for norm in normalizers.values()]
         if not all(r == results[0] for r in results):
             return False
     return True
 
+
 def main():
     # Create test cases
     print("Generating test cases...")
     test_cases = create_test_cases()
     total_chars = sum(len(t) for t in test_cases)
-    print(f"Created {len(test_cases)} test cases, total size: {total_chars:,} characters")
-    
+    print(
+        f"Created {len(test_cases)} test cases, total size: {total_chars:,} characters"
+    )
+
     # Verify output consistency
     print("\nVerifying output consistency...")
     if verify_outputs(test_cases):
@@ -254,15 +293,16 @@ def main():
     else:
         print("✗ Warning: Implementations produce different outputs!")
         return
-    
+
     # Run benchmarks
     print("\nRunning benchmarks...")
     iterations = 100
     results = benchmark_normalizers(test_cases, iterations)
-    
+
     # Print results
     print(f"\nResults for {iterations} iterations: ")
     for name, time_taken in results.items():
         print(f"{name.capitalize()}: {time_taken:.3f}s")
 
-main()
\ No newline at end of file
+
+main()
diff --git a/examples/assorted_checks/validate_wav.py b/examples/assorted_checks/validate_wav.py
index 20122ef..844655a 100644
--- a/examples/assorted_checks/validate_wav.py
+++ b/examples/assorted_checks/validate_wav.py
@@ -1,8 +1,11 @@
+import argparse
+from typing import Any, Dict
+from pathlib import Path
+
 import numpy as np
 import soundfile as sf
-import argparse
-from pathlib import Path
-from typing import Dict, Any
+from tqdm import tqdm
+
 
 def validate_tts(wav_path: str) -> dict:
     """
@@ -13,34 +16,40 @@ def validate_tts(wav_path: str) -> dict:
         audio, sr = sf.read(wav_path)
         if len(audio.shape) > 1:
             audio = np.mean(audio, axis=1)
-        
+
         duration = len(audio) / sr
         issues = []
-        
+
         # Basic quality checks
         abs_audio = np.abs(audio)
         stats = {
-            'rms': float(np.sqrt(np.mean(audio**2))),
-            'peak': float(np.max(abs_audio)),
-            'dc_offset': float(np.mean(audio))
+            "rms": float(np.sqrt(np.mean(audio**2))),
+            "peak": float(np.max(abs_audio)),
+            "dc_offset": float(np.mean(audio)),
         }
-        
+
         clip_count = np.sum(abs_audio >= 0.99)
         clip_percent = (clip_count / len(audio)) * 100
-        
+
         if duration < 0.1:
-            issues.append("WARNING: Audio is suspiciously short - possible failed generation")
-            
-        if stats['peak'] >= 1.0:
+            issues.append(
+                "WARNING: Audio is suspiciously short - possible failed generation"
+            )
+
+        if stats["peak"] >= 1.0:
             if clip_percent > 1.0:
-                issues.append(f"WARNING: Significant clipping detected ({clip_percent:.2e}% of samples)")
+                issues.append(
+                    f"WARNING: Significant clipping detected ({clip_percent:.2e}% of samples)"
+                )
             elif clip_percent > 0.01:
-                issues.append(f"INFO: Minor peak limiting detected ({clip_percent:.2e}% of samples)")
-            
-        if stats['rms'] < 0.01:
+                issues.append(
+                    f"INFO: Minor peak limiting detected ({clip_percent:.2e}% of samples)"
+                )
+
+        if stats["rms"] < 0.01:
             issues.append("WARNING: Audio is very quiet - possible failed generation")
-            
-        if abs(stats['dc_offset']) > 0.1:
+
+        if abs(stats["dc_offset"]) > 0.1:
             issues.append(f"WARNING: High DC offset ({stats['dc_offset']:.3f})")
 
         # Check for long silence gaps
@@ -51,66 +60,79 @@ def validate_tts(wav_path: str) -> dict:
         window_size = int(min_silence * sr)
         silence_count = 0
         last_silence = -1
-        
+
         start_idx = int(0.2 * sr)  # Skip first 0.2s
-        for i in range(start_idx, len(db) - window_size, window_size):
-            window = db[i:i+window_size]
+        for i in tqdm(
+            range(start_idx, len(db) - window_size, window_size),
+            desc="Checking for silence",
+        ):
+            window = db[i : i + window_size]
             if np.mean(window) < silence_threshold:
                 silent_ratio = np.mean(window < silence_threshold)
                 if silent_ratio > 0.9:
-                    if last_silence == -1 or (i/sr - last_silence) > 2.0:
+                    if last_silence == -1 or (i / sr - last_silence) > 2.0:
                         silence_count += 1
-                        last_silence = i/sr
-                        issues.append(f"WARNING: Long silence detected at {i/sr:.2f}s (duration: {min_silence:.1f}s)")
-        
+                        last_silence = i / sr
+                        issues.append(
+                            f"WARNING: Long silence detected at {i/sr:.2f}s (duration: {min_silence:.1f}s)"
+                        )
+
         if silence_count > 2:
-            issues.append(f"WARNING: Multiple long silences found ({silence_count} total)")
+            issues.append(
+                f"WARNING: Multiple long silences found ({silence_count} total)"
+            )
 
         # Detect audio artifacts
         diff = np.diff(audio)
         abs_diff = np.abs(diff)
         window_size = min(int(0.005 * sr), 256)
-        window = np.ones(window_size)/window_size
-        local_avg_diff = np.convolve(abs_diff, window, mode='same')
-        
+        window = np.ones(window_size) / window_size
+        local_avg_diff = np.convolve(abs_diff, window, mode="same")
+
         spikes = (abs_diff > (10 * local_avg_diff)) & (abs_diff > 0.1)
         artifact_indices = np.nonzero(spikes)[0]
-        
+
         artifacts = []
         if len(artifact_indices) > 0:
             gaps = np.diff(artifact_indices)
             min_gap = int(0.005 * sr)
             break_points = np.nonzero(gaps > min_gap)[0] + 1
             groups = np.split(artifact_indices, break_points)
-            
+
             for group in groups:
                 if len(group) >= 5:
                     severity = np.max(abs_diff[group])
                     if severity > 0.2:
-                        center_idx = group[len(group)//2]
-                        artifacts.append({
-                            'time': float(center_idx/sr),  # Ensure float for consistent timing
-                            'severity': float(severity)
-                        })
+                        center_idx = group[len(group) // 2]
+                        artifacts.append(
+                            {
+                                "time": float(
+                                    center_idx / sr
+                                ),  # Ensure float for consistent timing
+                                "severity": float(severity),
+                            }
+                        )
                         issues.append(
                             f"WARNING: Audio discontinuity at {center_idx/sr:.3f}s "
                             f"(severity: {severity:.3f})"
                         )
 
         # Check for repeated speech segments
-        for chunk_duration in [5.0, 10.0]:
+        for chunk_duration in tqdm(
+            [0.5, 2.5, 5.0, 10.0], desc="Checking for repeated speech"
+        ):
             chunk_size = int(chunk_duration * sr)
             overlap = int(0.2 * chunk_size)
-            
-            for i in range(0, len(audio) - 2*chunk_size, overlap):
-                chunk1 = audio[i:i+chunk_size]
-                chunk2 = audio[i+chunk_size:i+2*chunk_size]
-                
+
+            for i in range(0, len(audio) - 2 * chunk_size, overlap):
+                chunk1 = audio[i : i + chunk_size]
+                chunk2 = audio[i + chunk_size : i + 2 * chunk_size]
+
                 if np.mean(np.abs(chunk1)) < 0.01 or np.mean(np.abs(chunk2)) < 0.01:
                     continue
-                    
+
                 try:
-                    correlation = np.corrcoef(chunk1, chunk2)[0,1]
+                    correlation = np.corrcoef(chunk1, chunk2)[0, 1]
                     if not np.isnan(correlation) and correlation > 0.92:
                         issues.append(
                             f"WARNING: Possible repeated speech at {i/sr:.1f}s "
@@ -128,92 +150,113 @@ def validate_tts(wav_path: str) -> dict:
             "rms_level": f"{stats['rms']:.3f}",
             "dc_offset": f"{stats['dc_offset']:.3f}",
             "artifact_count": len(artifacts),
-            "artifact_locations": [a['time'] for a in artifacts],
-            "artifact_severities": [a['severity'] for a in artifacts],
+            "artifact_locations": [a["time"] for a in artifacts],
+            "artifact_severities": [a["severity"] for a in artifacts],
             "issues": issues,
-            "valid": len(issues) == 0
-        }
-        
-    except Exception as e:
-        return {
-            "file": wav_path,
-            "error": str(e),
-            "valid": False
+            "valid": len(issues) == 0,
         }
 
-def generate_analysis_plots(wav_path: str, output_dir: str, validation_result: Dict[str, Any]):
+    except Exception as e:
+        return {"file": wav_path, "error": str(e), "valid": False}
+
+
+def generate_analysis_plots(
+    wav_path: str, output_dir: str, validation_result: Dict[str, Any]
+):
     """
     Generate analysis plots for audio file with time-aligned visualizations.
     """
     import matplotlib.pyplot as plt
     from scipy.signal import spectrogram
-    
+
     # Load audio
     audio, sr = sf.read(wav_path)
     if len(audio.shape) > 1:
         audio = np.mean(audio, axis=1)
-    
+
     # Create figure with shared x-axis
     fig = plt.figure(figsize=(15, 8))
     gs = plt.GridSpec(2, 1, height_ratios=[1.2, 0.8], hspace=0.1)
     ax1 = fig.add_subplot(gs[0])
     ax2 = fig.add_subplot(gs[1], sharex=ax1)
-    
+
     # Calculate spectrogram
     nperseg = 2048
     noverlap = 1536
-    f, t, Sxx = spectrogram(audio, sr, nperseg=nperseg, noverlap=noverlap, 
-                           window='hann', scaling='spectrum')
-    
+    f, t, Sxx = spectrogram(
+        audio, sr, nperseg=nperseg, noverlap=noverlap, window="hann", scaling="spectrum"
+    )
+
     # Plot spectrogram
-    im = ax1.pcolormesh(t, f, 10 * np.log10(Sxx + 1e-10), 
-                        shading='gouraud', cmap='viridis', 
-                        vmin=-100, vmax=-20)
-    ax1.set_ylabel('Frequency [Hz]', fontsize=10)
-    cbar = plt.colorbar(im, ax=ax1, label='dB')
-    ax1.set_title('Spectrogram', pad=10, fontsize=12)
-    
+    im = ax1.pcolormesh(
+        t,
+        f,
+        10 * np.log10(Sxx + 1e-10),
+        shading="gouraud",
+        cmap="viridis",
+        vmin=-100,
+        vmax=-20,
+    )
+    ax1.set_ylabel("Frequency [Hz]", fontsize=10)
+    cbar = plt.colorbar(im, ax=ax1, label="dB")
+    ax1.set_title("Spectrogram", pad=10, fontsize=12)
+
     # Plot waveform with exact time alignment
     times = np.arange(len(audio)) / sr
-    ax2.plot(times, audio, color='#2E5596', alpha=0.7, linewidth=0.5, label='Audio')
-    ax2.set_ylabel('Amplitude', fontsize=10)
-    ax2.set_xlabel('Time [sec]', fontsize=10)
+    ax2.plot(times, audio, color="#2E5596", alpha=0.7, linewidth=0.5, label="Audio")
+    ax2.set_ylabel("Amplitude", fontsize=10)
+    ax2.set_xlabel("Time [sec]", fontsize=10)
     ax2.grid(True, alpha=0.2)
-    
+
     # Add artifact markers
-    if 'artifact_locations' in validation_result and validation_result['artifact_locations']:
-        for loc in validation_result['artifact_locations']:
-            ax1.axvline(x=loc, color='red', alpha=0.7, linewidth=2)
-            ax2.axvline(x=loc, color='red', alpha=0.7, linewidth=2, label='Detected Artifacts')
-        
+    if (
+        "artifact_locations" in validation_result
+        and validation_result["artifact_locations"]
+    ):
+        for loc in validation_result["artifact_locations"]:
+            ax1.axvline(x=loc, color="red", alpha=0.7, linewidth=2)
+            ax2.axvline(
+                x=loc, color="red", alpha=0.7, linewidth=2, label="Detected Artifacts"
+            )
+
         # Add legend to both plots
-        if len(validation_result['artifact_locations']) > 0:
-            ax1.plot([], [], color='red', linewidth=2, label='Detected Artifacts')
-            ax1.legend(loc='upper right', fontsize=8)
+        if len(validation_result["artifact_locations"]) > 0:
+            ax1.plot([], [], color="red", linewidth=2, label="Detected Artifacts")
+            ax1.legend(loc="upper right", fontsize=8)
             # Only add unique labels to legend
             handles, labels = ax2.get_legend_handles_labels()
             unique_labels = dict(zip(labels, handles))
-            ax2.legend(unique_labels.values(), unique_labels.keys(), 
-                      loc='upper right', fontsize=8)
-    
+            ax2.legend(
+                unique_labels.values(),
+                unique_labels.keys(),
+                loc="upper right",
+                fontsize=8,
+            )
+
     # Set common x limits
-    xlim = (0, len(audio)/sr)
+    xlim = (0, len(audio) / sr)
     ax1.set_xlim(xlim)
     ax2.set_xlim(xlim)
     og_filename = Path(wav_path).name.split(".")[0]
     # Save plot
-    plt.savefig(Path(output_dir) / f"{og_filename}_audio_analysis.png", dpi=300, bbox_inches='tight')
+    plt.savefig(
+        Path(output_dir) / f"{og_filename}_audio_analysis.png",
+        dpi=300,
+        bbox_inches="tight",
+    )
     plt.close()
 
-if __name__ == "__main__":
-    wav_file = r"C:\Users\jerem\Desktop\Kokoro-FastAPI\examples\output.wav"
-    silent=False
 
+if __name__ == "__main__":
+    wav_file = r"C:\Users\jerem\Desktop\Kokoro-FastAPI\examples\assorted_checks\benchmarks\output_audio\chunk_600_tokens.wav"
+    silent = False
+
+    print(f"\n\n Processing:\n\t{wav_file}")
     result = validate_tts(wav_file)
     if not silent:
         wav_root_dir = Path(wav_file).parent
         generate_analysis_plots(wav_file, wav_root_dir, result)
-    
+
     print(f"\nValidating: {result['file']}")
     if "error" in result:
         print(f"Error: {result['error']}")
@@ -224,10 +267,10 @@ if __name__ == "__main__":
         print(f"RMS Level: {result['rms_level']}")
         print(f"DC Offset: {result['dc_offset']}")
         print(f"Detected Artifacts: {result['artifact_count']}")
-        
+
         if result["issues"]:
             print("\nIssues Found:")
             for issue in result["issues"]:
                 print(f"- {issue}")
         else:
-            print("\nNo issues found")
\ No newline at end of file
+            print("\nNo issues found")
diff --git a/examples/assorted_checks/validate_wavs.py b/examples/assorted_checks/validate_wavs.py
index a37c043..ebf114f 100644
--- a/examples/assorted_checks/validate_wavs.py
+++ b/examples/assorted_checks/validate_wavs.py
@@ -1,7 +1,9 @@
 import argparse
 from pathlib import Path
+
 from validate_wav import validate_tts
 
+
 def print_validation_result(result: dict, rel_path: Path):
     """Print full validation details for a single file."""
     print(f"\nValidating: {rel_path}")
@@ -13,7 +15,7 @@ def print_validation_result(result: dict, rel_path: Path):
         print(f"Peak Amplitude: {result['peak_amplitude']}")
         print(f"RMS Level: {result['rms_level']}")
         print(f"DC Offset: {result['dc_offset']}")
-        
+
         if result["issues"]:
             print("\nIssues Found:")
             for issue in result["issues"]:
@@ -21,25 +23,26 @@ def print_validation_result(result: dict, rel_path: Path):
         else:
             print("\nNo issues found")
 
+
 def validate_directory(directory: str):
     """Validate all wav files in a directory with detailed output and summary."""
     dir_path = Path(directory)
-    
+
     # Find all wav files (including nested directories)
     wav_files = list(dir_path.rglob("*.wav"))
     wav_files.extend(dir_path.rglob("*.mp3"))  # Also check mp3s
     wav_files = sorted(wav_files)
-    
+
     if not wav_files:
         print(f"No .wav or .mp3 files found in {directory}")
         return
-        
+
     print(f"Found {len(wav_files)} files in {directory}")
     print("=" * 80)
-    
+
     # Store results for summary
     results = []
-    
+
     # Detailed validation output
     for wav_file in wav_files:
         result = validate_tts(str(wav_file))
@@ -47,7 +50,7 @@ def validate_directory(directory: str):
         print_validation_result(result, rel_path)
         results.append((rel_path, result))
         print("=" * 80)
-    
+
     # Summary with detailed issues
     print("\nSUMMARY:")
     for rel_path, result in results:
@@ -58,15 +61,18 @@ def validate_directory(directory: str):
             issues = result["issues"]
             first_issue = issues[0].replace("WARNING: ", "")
             if len(issues) > 1:
-                print(f"{rel_path}: FAIL - {first_issue} (+{len(issues)-1} more issues)")
+                print(
+                    f"{rel_path}: FAIL - {first_issue} (+{len(issues)-1} more issues)"
+                )
             else:
                 print(f"{rel_path}: FAIL - {first_issue}")
         else:
             print(f"{rel_path}: PASS")
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Batch validate TTS wav files")
     parser.add_argument("directory", help="Directory containing wav files to validate")
     args = parser.parse_args()
-    
+
     validate_directory(args.directory)
diff --git a/examples/output.wav b/examples/output.wav
deleted file mode 100644
index 7915e5a..0000000
Binary files a/examples/output.wav and /dev/null differ
diff --git a/examples/output_audio_analysis.png b/examples/output_audio_analysis.png
deleted file mode 100644
index 8d0541d..0000000
Binary files a/examples/output_audio_analysis.png and /dev/null differ
diff --git a/examples/speech.mp3 b/examples/speech.mp3
deleted file mode 100644
index c0dc9b0..0000000
Binary files a/examples/speech.mp3 and /dev/null differ
diff --git a/requirements.txt b/requirements.txt
index 284620c..365e005 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ numpy==2.2.1
 scipy==1.14.1
 
 # Audio processing
-soundfile==0.12.1
+soundfile==0.13.0
 
 # Text processing
 phonemizer==3.3.0