diff --git a/.coveragerc b/.coveragerc
index dab8655..c66579a 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -6,6 +6,7 @@ omit =
     Kokoro-82M/*
     MagicMock/*
     test_*.py
+    examples/*
 
 [report]
 exclude_lines =
diff --git a/.gitignore b/.gitignore
index aebbfa7..f61cc2d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 
-output/
+output/*
+output_audio/*
 ui/data/*
 
 *.db
@@ -16,3 +17,10 @@ env/
 
 .coverage
 
+examples/assorted_checks/benchmarks/output_audio/*
+examples/assorted_checks/test_combinations/output/*
+examples/assorted_checks/test_openai/output/*
+
+examples/assorted_checks/test_voices/output/*
+examples/assorted_checks/test_formats/output/*
+ui/RepoScreenshot.png
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4194878..6303af1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,20 @@
 
 Notable changes to this project will be documented in this file.
 
+## 2025-01-04
+### Added
+- ONNX Support:
+  - Added single batch ONNX support for CPU inference
+  - Roughly 0.4 RTF (2.4x real-time speed)
+
+### Modified
+- Code Refactoring:
+  - Work on modularizing phonemizer and tokenizer into separate services
+  - Incorporated these services into a dev endpoint
+- Testing and Benchmarking:
+  - Cleaned up benchmarking scripts
+  - Cleaned up test scripts
+  - Added auto-WAV validation scripts
 
 ## 2025-01-02
 - Audio Format Support:
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
index 959e555..e9f2d3b 100644
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@@ -10,8 +10,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
-# Install PyTorch CPU version
-RUN pip3 install --no-cache-dir torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
+# Install PyTorch CPU version and ONNX runtime
+RUN pip3 install --no-cache-dir torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu && \
+    pip3 install --no-cache-dir onnxruntime==1.20.1
 
 # Install all other dependencies from requirements.txt
 COPY requirements.txt .
diff --git a/README.md b/README.md
index 9732bd6..2e38852 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,8 @@
 </p>
 
 # Kokoro TTS API
-[![Tests](https://img.shields.io/badge/tests-89%20passed-darkgreen)]()
-[![Coverage](https://img.shields.io/badge/coverage-80%25-darkgreen)]()
+[![Tests](https://img.shields.io/badge/tests-95%20passed-darkgreen)]()
+[![Coverage](https://img.shields.io/badge/coverage-72%25-darkgreen)]()
 [![Tested at Model Commit](https://img.shields.io/badge/last--tested--model--commit-a67f113-blue)](https://huggingface.co/hexgrad/Kokoro-82M/tree/c3b0d86e2a980e027ef71c28819ea02e351c2667) [![Try on Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Try%20on-Spaces-blue)](https://huggingface.co/spaces/Remsky/Kokoro-TTS-Zero)
 
 Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model
@@ -187,15 +187,13 @@ Key Performance Metrics:
 <summary>GPU Vs. CPU</summary>
 
 ```bash
-# GPU: Requires NVIDIA GPU with CUDA 12.1 support
+# GPU: Requires NVIDIA GPU with CUDA 12.1 support (~35x realtime speed)
 docker compose up --build
 
-# CPU: ~10x slower than GPU inference
+# CPU: ONNX optimized inference (~2.4x realtime speed)
 docker compose -f docker-compose.cpu.yml up --build
 ```
 
-*Note: CPU Inference is currently a very basic implementation, and not heavily tested*
-
 </details>
 
 <details>
diff --git a/api/src/core/config.py b/api/src/core/config.py
index 52aea19..5348730 100644
--- a/api/src/core/config.py
+++ b/api/src/core/config.py
@@ -14,9 +14,18 @@ class Settings(BaseSettings):
     output_dir_size_limit_mb: float = 500.0  # Maximum size of output directory in MB
     default_voice: str = "af"
     model_dir: str = "/app/Kokoro-82M"  # Base directory for model files
-    model_path: str = "kokoro-v0_19.pth"
+    pytorch_model_path: str = "kokoro-v0_19.pth"
+    onnx_model_path: str = "kokoro-v0_19.onnx"
     voices_dir: str = "voices"
     sample_rate: int = 24000
+    
+    # ONNX Optimization Settings
+    onnx_num_threads: int = 4  # Number of threads for intra-op parallelism
+    onnx_inter_op_threads: int = 4  # Number of threads for inter-op parallelism
+    onnx_execution_mode: str = "parallel"  # parallel or sequential
+    onnx_optimization_level: str = "all"  # all, basic, or disabled
+    onnx_memory_pattern: bool = True  # Enable memory pattern optimization
+    onnx_arena_extend_strategy: str = "kNextPowerOfTwo"  # Memory allocation strategy
 
     class Config:
         env_file = ".env"
diff --git a/api/src/core/kokoro.py b/api/src/core/kokoro.py
new file mode 100644
index 0000000..e3d6c39
--- /dev/null
+++ b/api/src/core/kokoro.py
@@ -0,0 +1,185 @@
+import re
+
+import torch
+import phonemizer
+
+
+def split_num(num):
+    num = num.group()
+    if "." in num:
+        return num
+    elif ":" in num:
+        h, m = [int(n) for n in num.split(":")]
+        if m == 0:
+            return f"{h} o'clock"
+        elif m < 10:
+            return f"{h} oh {m}"
+        return f"{h} {m}"
+    year = int(num[:4])
+    if year < 1100 or year % 1000 < 10:
+        return num
+    left, right = num[:2], int(num[2:4])
+    s = "s" if num.endswith("s") else ""
+    if 100 <= year % 1000 <= 999:
+        if right == 0:
+            return f"{left} hundred{s}"
+        elif right < 10:
+            return f"{left} oh {right}{s}"
+    return f"{left} {right}{s}"
+
+
+def flip_money(m):
+    m = m.group()
+    bill = "dollar" if m[0] == "$" else "pound"
+    if m[-1].isalpha():
+        return f"{m[1:]} {bill}s"
+    elif "." not in m:
+        s = "" if m[1:] == "1" else "s"
+        return f"{m[1:]} {bill}{s}"
+    b, c = m[1:].split(".")
+    s = "" if b == "1" else "s"
+    c = int(c.ljust(2, "0"))
+    coins = (
+        f"cent{'' if c == 1 else 's'}"
+        if m[0] == "$"
+        else ("penny" if c == 1 else "pence")
+    )
+    return f"{b} {bill}{s} and {c} {coins}"
+
+
+def point_num(num):
+    a, b = num.group().split(".")
+    return " point ".join([a, " ".join(b)])
+
+
+def normalize_text(text):
+    text = text.replace(chr(8216), "'").replace(chr(8217), "'")
+    text = text.replace("«", chr(8220)).replace("»", chr(8221))
+    text = text.replace(chr(8220), '"').replace(chr(8221), '"')
+    text = text.replace("(", "«").replace(")", "»")
+    for a, b in zip("、。！，：；？", ",.!,:;?"):
+        text = text.replace(a, b + " ")
+    text = re.sub(r"[^\S \n]", " ", text)
+    text = re.sub(r"  +", " ", text)
+    text = re.sub(r"(?<=\n) +(?=\n)", "", text)
+    text = re.sub(r"\bD[Rr]\.(?= [A-Z])", "Doctor", text)
+    text = re.sub(r"\b(?:Mr\.|MR\.(?= [A-Z]))", "Mister", text)
+    text = re.sub(r"\b(?:Ms\.|MS\.(?= [A-Z]))", "Miss", text)
+    text = re.sub(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))", "Mrs", text)
+    text = re.sub(r"\betc\.(?! [A-Z])", "etc", text)
+    text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text)
+    text = re.sub(
+        r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)", split_num, text
+    )
+    text = re.sub(r"(?<=\d),(?=\d)", "", text)
+    text = re.sub(
+        r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b",
+        flip_money,
+        text,
+    )
+    text = re.sub(r"\d*\.\d+", point_num, text)
+    text = re.sub(r"(?<=\d)-(?=\d)", " to ", text)
+    text = re.sub(r"(?<=\d)S", " S", text)
+    text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
+    text = re.sub(r"(?<=X')S\b", "s", text)
+    text = re.sub(
+        r"(?:[A-Za-z]\.){2,} [a-z]", lambda m: m.group().replace(".", "-"), text
+    )
+    text = re.sub(r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text)
+    return text.strip()
+
+
+def get_vocab():
+    _pad = "$"
+    _punctuation = ';:,.!?¡¿—…"«»“” '
+    _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+    _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
+    symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
+    dicts = {}
+    for i in range(len((symbols))):
+        dicts[symbols[i]] = i
+    return dicts
+
+
+VOCAB = get_vocab()
+
+
+def tokenize(ps):
+    return [i for i in map(VOCAB.get, ps) if i is not None]
+
+
+phonemizers = dict(
+    a=phonemizer.backend.EspeakBackend(
+        language="en-us", preserve_punctuation=True, with_stress=True
+    ),
+    b=phonemizer.backend.EspeakBackend(
+        language="en-gb", preserve_punctuation=True, with_stress=True
+    ),
+)
+
+
+def phonemize(text, lang, norm=True):
+    if norm:
+        text = normalize_text(text)
+    ps = phonemizers[lang].phonemize([text])
+    ps = ps[0] if ps else ""
+    # https://en.wiktionary.org/wiki/kokoro#English
+    ps = ps.replace("kəkˈoːɹoʊ", "kˈoʊkəɹoʊ").replace("kəkˈɔːɹəʊ", "kˈəʊkəɹəʊ")
+    ps = ps.replace("ʲ", "j").replace("r", "ɹ").replace("x", "k").replace("ɬ", "l")
+    ps = re.sub(r"(?<=[a-zɹː])(?=hˈʌndɹɪd)", " ", ps)
+    ps = re.sub(r' z(?=[;:,.!?¡¿—…"«»“” ]|$)', "z", ps)
+    if lang == "a":
+        ps = re.sub(r"(?<=nˈaɪn)ti(?!ː)", "di", ps)
+    ps = "".join(filter(lambda p: p in VOCAB, ps))
+    return ps.strip()
+
+
+def length_to_mask(lengths):
+    mask = (
+        torch.arange(lengths.max())
+        .unsqueeze(0)
+        .expand(lengths.shape[0], -1)
+        .type_as(lengths)
+    )
+    mask = torch.gt(mask + 1, lengths.unsqueeze(1))
+    return mask
+
+
+@torch.no_grad()
+def forward(model, tokens, ref_s, speed):
+    device = ref_s.device
+    tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
+    input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
+    text_mask = length_to_mask(input_lengths).to(device)
+    bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
+    d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
+    s = ref_s[:, 128:]
+    d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
+    x, _ = model.predictor.lstm(d)
+    duration = model.predictor.duration_proj(x)
+    duration = torch.sigmoid(duration).sum(axis=-1) / speed
+    pred_dur = torch.round(duration).clamp(min=1).long()
+    pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
+    c_frame = 0
+    for i in range(pred_aln_trg.size(0)):
+        pred_aln_trg[i, c_frame : c_frame + pred_dur[0, i].item()] = 1
+        c_frame += pred_dur[0, i].item()
+    en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
+    F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
+    t_en = model.text_encoder(tokens, input_lengths, text_mask)
+    asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
+    return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
+
+
+def generate(model, text, voicepack, lang="a", speed=1):
+    ps = phonemize(text, lang)
+    tokens = tokenize(ps)
+    if not tokens:
+        return None
+    elif len(tokens) > 510:
+        tokens = tokens[:510]
+        print("Truncated to 510 tokens")
+    ref_s = voicepack[len(tokens)]
+    out = forward(model, tokens, ref_s, speed)
+    ps = "".join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
+    return out, ps
diff --git a/api/src/main.py b/api/src/main.py
index ebe2f53..c2a567e 100644
--- a/api/src/main.py
+++ b/api/src/main.py
@@ -10,8 +10,10 @@ from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 
 from .core.config import settings
-from .services.tts import TTSModel, TTSService
+from .services.tts_model import TTSModel
+from .services.tts_service import TTSService
 from .routers.openai_compatible import router as openai_router
+from .routers.text_processing import router as text_router
 
 
 @asynccontextmanager
@@ -20,8 +22,8 @@ async def lifespan(app: FastAPI):
     logger.info("Loading TTS model and voice packs...")
 
     # Initialize the main model with warm-up
-    model, voicepack_count = TTSModel.initialize()
-    logger.info(f"Model loaded and warmed up on {TTSModel._device}")
+    voicepack_count = TTSModel.setup()
+    logger.info(f"Model loaded and warmed up on {TTSModel.get_device()}")
     logger.info(f"{voicepack_count} voice packs loaded successfully")
     yield
 
@@ -44,8 +46,9 @@ app.add_middleware(
     allow_headers=["*"],
 )
 
-# Include OpenAI compatible router
+# Include routers
 app.include_router(openai_router, prefix="/v1")
+app.include_router(text_router)
 
 
 # Health check endpoint
diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py
index 4183d39..6663d7b 100644
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@@ -3,7 +3,7 @@ from typing import List
 from loguru import logger
 from fastapi import Depends, Response, APIRouter, HTTPException
 
-from ..services.tts import TTSService
+from ..services.tts_service import TTSService
 from ..services.audio import AudioService
 from ..structures.schemas import OpenAISpeechRequest
 
@@ -15,9 +15,7 @@ router = APIRouter(
 
 def get_tts_service() -> TTSService:
     """Dependency to get TTSService instance with database session"""
-    return TTSService(
-        start_worker=False
-    )  # Don't start worker thread for OpenAI endpoint
+    return TTSService()  # Initialize TTSService with default settings
 
 
 @router.post("/audio/speech")
diff --git a/api/src/routers/text_processing.py b/api/src/routers/text_processing.py
new file mode 100644
index 0000000..9e1ce3a
--- /dev/null
+++ b/api/src/routers/text_processing.py
@@ -0,0 +1,30 @@
+from fastapi import APIRouter
+from ..structures.text_schemas import PhonemeRequest, PhonemeResponse
+from ..services.text_processing import phonemize, tokenize
+
+router = APIRouter(
+    prefix="/text",
+    tags=["text processing"]
+)
+
+@router.post("/phonemize", response_model=PhonemeResponse)
+async def phonemize_text(request: PhonemeRequest) -> PhonemeResponse:
+    """Convert text to phonemes and tokens: Rough attempt
+    
+    Args:
+        request: Request containing text and language
+        
+    Returns:
+        Phonemes and token IDs
+    """
+    # Get phonemes
+    phonemes = phonemize(request.text, request.language)
+    
+    # Get tokens
+    tokens = tokenize(phonemes)
+    tokens = [0] + tokens + [0]  # Add start/end tokens
+    
+    return PhonemeResponse(
+        phonemes=phonemes,
+        tokens=tokens
+    )
diff --git a/api/src/services/__init__.py b/api/src/services/__init__.py
index 46f2e93..82cf76e 100644
--- a/api/src/services/__init__.py
+++ b/api/src/services/__init__.py
@@ -1,3 +1,3 @@
-from .tts import TTSModel, TTSService
+from .tts_service import TTSService
 
-__all__ = ["TTSService", "TTSModel"]
+__all__ = ["TTSService"]
diff --git a/api/src/services/text_processing/__init__.py b/api/src/services/text_processing/__init__.py
new file mode 100644
index 0000000..f945e18
--- /dev/null
+++ b/api/src/services/text_processing/__init__.py
@@ -0,0 +1,13 @@
+from .normalizer import normalize_text
+from .phonemizer import phonemize, PhonemizerBackend, EspeakBackend
+from .vocabulary import tokenize, decode_tokens, VOCAB
+
+__all__ = [
+    'normalize_text',
+    'phonemize',
+    'tokenize',
+    'decode_tokens',
+    'VOCAB',
+    'PhonemizerBackend',
+    'EspeakBackend'
+]
diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py
new file mode 100644
index 0000000..db5b7db
--- /dev/null
+++ b/api/src/services/text_processing/normalizer.py
@@ -0,0 +1,111 @@
+import re
+
+def split_num(num: re.Match) -> str:
+    """Handle number splitting for various formats"""
+    num = num.group()
+    if "." in num:
+        return num
+    elif ":" in num:
+        h, m = [int(n) for n in num.split(":")]
+        if m == 0:
+            return f"{h} o'clock"
+        elif m < 10:
+            return f"{h} oh {m}"
+        return f"{h} {m}"
+    year = int(num[:4])
+    if year < 1100 or year % 1000 < 10:
+        return num
+    left, right = num[:2], int(num[2:4])
+    s = "s" if num.endswith("s") else ""
+    if 100 <= year % 1000 <= 999:
+        if right == 0:
+            return f"{left} hundred{s}"
+        elif right < 10:
+            return f"{left} oh {right}{s}"
+    return f"{left} {right}{s}"
+
+def handle_money(m: re.Match) -> str:
+    """Convert money expressions to spoken form"""
+    m = m.group()
+    bill = "dollar" if m[0] == "$" else "pound"
+    if m[-1].isalpha():
+        return f"{m[1:]} {bill}s"
+    elif "." not in m:
+        s = "" if m[1:] == "1" else "s"
+        return f"{m[1:]} {bill}{s}"
+    b, c = m[1:].split(".")
+    s = "" if b == "1" else "s"
+    c = int(c.ljust(2, "0"))
+    coins = (
+        f"cent{'' if c == 1 else 's'}"
+        if m[0] == "$"
+        else ("penny" if c == 1 else "pence")
+    )
+    return f"{b} {bill}{s} and {c} {coins}"
+
+def handle_decimal(num: re.Match) -> str:
+    """Convert decimal numbers to spoken form"""
+    a, b = num.group().split(".")
+    return " point ".join([a, " ".join(b)])
+
+def normalize_text(text: str) -> str:
+    """Normalize text for TTS processing
+    
+    Args:
+        text: Input text to normalize
+        
+    Returns:
+        Normalized text
+    """
+    # Replace quotes and brackets
+    text = text.replace(chr(8216), "'").replace(chr(8217), "'")
+    text = text.replace("«", chr(8220)).replace("»", chr(8221))
+    text = text.replace(chr(8220), '"').replace(chr(8221), '"')
+    text = text.replace("(", "«").replace(")", "»")
+    
+    # Handle CJK punctuation
+    for a, b in zip("、。！，：；？", ",.!,:;?"):
+        text = text.replace(a, b + " ")
+    
+    # Clean up whitespace
+    text = re.sub(r"[^\S \n]", " ", text)
+    text = re.sub(r"  +", " ", text)
+    text = re.sub(r"(?<=\n) +(?=\n)", "", text)
+    
+    # Handle titles and abbreviations
+    text = re.sub(r"\bD[Rr]\.(?= [A-Z])", "Doctor", text)
+    text = re.sub(r"\b(?:Mr\.|MR\.(?= [A-Z]))", "Mister", text)
+    text = re.sub(r"\b(?:Ms\.|MS\.(?= [A-Z]))", "Miss", text)
+    text = re.sub(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))", "Mrs", text)
+    text = re.sub(r"\betc\.(?! [A-Z])", "etc", text)
+    
+    # Handle common words
+    text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text)
+    
+    # Handle numbers and money
+    text = re.sub(
+        r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)", 
+        split_num, 
+        text
+    )
+    text = re.sub(r"(?<=\d),(?=\d)", "", text)
+    text = re.sub(
+        r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b",
+        handle_money,
+        text,
+    )
+    text = re.sub(r"\d*\.\d+", handle_decimal, text)
+    
+    # Handle various formatting
+    text = re.sub(r"(?<=\d)-(?=\d)", " to ", text)
+    text = re.sub(r"(?<=\d)S", " S", text)
+    text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
+    text = re.sub(r"(?<=X')S\b", "s", text)
+    text = re.sub(
+        r"(?:[A-Za-z]\.){2,} [a-z]", 
+        lambda m: m.group().replace(".", "-"), 
+        text
+    )
+    text = re.sub(r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text)
+    
+    return text.strip()
diff --git a/api/src/services/text_processing/phonemizer.py b/api/src/services/text_processing/phonemizer.py
new file mode 100644
index 0000000..0d04d86
--- /dev/null
+++ b/api/src/services/text_processing/phonemizer.py
@@ -0,0 +1,97 @@
+import re
+from abc import ABC, abstractmethod
+import phonemizer
+from .normalizer import normalize_text
+
+class PhonemizerBackend(ABC):
+    """Abstract base class for phonemization backends"""
+    
+    @abstractmethod
+    def phonemize(self, text: str) -> str:
+        """Convert text to phonemes
+        
+        Args:
+            text: Text to convert to phonemes
+            
+        Returns:
+            Phonemized text
+        """
+        pass
+
+class EspeakBackend(PhonemizerBackend):
+    """Espeak-based phonemizer implementation"""
+    
+    def __init__(self, language: str):
+        """Initialize espeak backend
+        
+        Args:
+            language: Language code ('en-us' or 'en-gb')
+        """
+        self.backend = phonemizer.backend.EspeakBackend(
+            language=language,
+            preserve_punctuation=True,
+            with_stress=True
+        )
+        self.language = language
+    
+    def phonemize(self, text: str) -> str:
+        """Convert text to phonemes using espeak
+        
+        Args:
+            text: Text to convert to phonemes
+            
+        Returns:
+            Phonemized text
+        """
+        # Phonemize text
+        ps = self.backend.phonemize([text])
+        ps = ps[0] if ps else ""
+        
+        # Handle special cases
+        ps = ps.replace("kəkˈoːɹoʊ", "kˈoʊkəɹoʊ").replace("kəkˈɔːɹəʊ", "kˈəʊkəɹəʊ")
+        ps = ps.replace("ʲ", "j").replace("r", "ɹ").replace("x", "k").replace("ɬ", "l")
+        ps = re.sub(r"(?<=[a-zɹː])(?=hˈʌndɹɪd)", " ", ps)
+        ps = re.sub(r' z(?=[;:,.!?¡¿—…"«»"" ]|$)', "z", ps)
+        
+        # Language-specific rules
+        if self.language == "en-us":
+            ps = re.sub(r"(?<=nˈaɪn)ti(?!ː)", "di", ps)
+            
+        return ps.strip()
+
+def create_phonemizer(language: str = "a") -> PhonemizerBackend:
+    """Factory function to create phonemizer backend
+    
+    Args:
+        language: Language code ('a' for US English, 'b' for British English)
+        
+    Returns:
+        Phonemizer backend instance
+    """
+    # Map language codes to espeak language codes
+    lang_map = {
+        "a": "en-us",
+        "b": "en-gb"
+    }
+    
+    if language not in lang_map:
+        raise ValueError(f"Unsupported language code: {language}")
+        
+    return EspeakBackend(lang_map[language])
+
+def phonemize(text: str, language: str = "a", normalize: bool = True) -> str:
+    """Convert text to phonemes
+    
+    Args:
+        text: Text to convert to phonemes
+        language: Language code ('a' for US English, 'b' for British English)
+        normalize: Whether to normalize text before phonemization
+        
+    Returns:
+        Phonemized text
+    """
+    if normalize:
+        text = normalize_text(text)
+        
+    phonemizer = create_phonemizer(language)
+    return phonemizer.phonemize(text)
diff --git a/api/src/services/text_processing/vocabulary.py b/api/src/services/text_processing/vocabulary.py
new file mode 100644
index 0000000..66af961
--- /dev/null
+++ b/api/src/services/text_processing/vocabulary.py
@@ -0,0 +1,37 @@
+def get_vocab():
+    """Get the vocabulary dictionary mapping characters to token IDs"""
+    _pad = "$"
+    _punctuation = ';:,.!?¡¿—…"«»"" '
+    _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+    _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
+    
+    # Create vocabulary dictionary
+    symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
+    return {symbol: i for i, symbol in enumerate(symbols)}
+
+# Initialize vocabulary
+VOCAB = get_vocab()
+
+def tokenize(phonemes: str) -> list[int]:
+    """Convert phonemes string to token IDs
+    
+    Args:
+        phonemes: String of phonemes to tokenize
+        
+    Returns:
+        List of token IDs
+    """
+    return [i for i in map(VOCAB.get, phonemes) if i is not None]
+
+def decode_tokens(tokens: list[int]) -> str:
+    """Convert token IDs back to phonemes string
+    
+    Args:
+        tokens: List of token IDs
+        
+    Returns:
+        String of phonemes
+    """
+    # Create reverse mapping
+    id_to_symbol = {i: s for s, i in VOCAB.items()}
+    return "".join(id_to_symbol[t] for t in tokens)
diff --git a/api/src/services/tts.py b/api/src/services/tts.py
deleted file mode 100644
index c1abd9f..0000000
--- a/api/src/services/tts.py
+++ /dev/null
@@ -1,286 +0,0 @@
-import io
-import os
-import re
-import time
-import threading
-from typing import List, Tuple, Optional
-
-import numpy as np
-import torch
-import tiktoken
-import scipy.io.wavfile as wavfile
-from kokoro import generate, tokenize, phonemize, normalize_text
-from loguru import logger
-from models import build_model
-
-from ..core.config import settings
-
-enc = tiktoken.get_encoding("cl100k_base")
-
-
-class TTSModel:
-    _instance = None
-    _device = None
-    _lock = threading.Lock()
-
-    # Directory for all voices (copied base voices, and any created combined voices)
-    VOICES_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "voices")
-
-    @classmethod
-    def initialize(cls):
-        """Initialize and warm up the model"""
-        with cls._lock:
-            if cls._instance is None:
-                # Initialize model
-                cls._device = "cuda" if torch.cuda.is_available() else "cpu"
-                logger.info(f"Initializing model on {cls._device}")
-                model_path = os.path.join(settings.model_dir, settings.model_path)
-                model = build_model(model_path, cls._device)
-                cls._instance = model
-
-                # Ensure voices directory exists
-                os.makedirs(cls.VOICES_DIR, exist_ok=True)
-
-                # Copy base voices to local directory
-                base_voices_dir = os.path.join(settings.model_dir, settings.voices_dir)
-                if os.path.exists(base_voices_dir):
-                    for file in os.listdir(base_voices_dir):
-                        if file.endswith(".pt"):
-                            voice_name = file[:-3]
-                            voice_path = os.path.join(cls.VOICES_DIR, file)
-                            if not os.path.exists(voice_path):
-                                try:
-                                    logger.info(
-                                        f"Copying base voice {voice_name} to voices directory"
-                                    )
-                                    base_path = os.path.join(base_voices_dir, file)
-                                    voicepack = torch.load(
-                                        base_path,
-                                        map_location=cls._device,
-                                        weights_only=True,
-                                    )
-                                    torch.save(voicepack, voice_path)
-                                except Exception as e:
-                                    logger.error(
-                                        f"Error copying voice {voice_name}: {str(e)}"
-                                    )
-
-                # Warm up with default voice
-                try:
-                    dummy_text = "Hello"
-                    voice_path = os.path.join(cls.VOICES_DIR, "af.pt")
-                    dummy_voicepack = torch.load(
-                        voice_path, map_location=cls._device, weights_only=True
-                    )
-                    generate(model, dummy_text, dummy_voicepack, lang="a", speed=1.0)
-                    logger.info("Model warm-up complete")
-                except Exception as e:
-                    logger.warning(f"Model warm-up failed: {e}")
-
-            # Count voices in directory for validation
-            voice_count = len(
-                [f for f in os.listdir(cls.VOICES_DIR) if f.endswith(".pt")]
-            )
-            return cls._instance, voice_count
-
-    @classmethod
-    def get_instance(cls):
-        """Get the initialized instance or raise an error"""
-        if cls._instance is None:
-            raise RuntimeError("Model not initialized. Call initialize() first.")
-        return cls._instance, cls._device
-
-
-class TTSService:
-    def __init__(self, output_dir: str = None, start_worker: bool = False):
-        self.output_dir = output_dir
-        self._ensure_voices()
-        if start_worker:
-            self.start_worker()
-
-    def _ensure_voices(self):
-        """Copy base voices to local voices directory during initialization"""
-        os.makedirs(TTSModel.VOICES_DIR, exist_ok=True)
-
-        base_voices_dir = os.path.join(settings.model_dir, settings.voices_dir)
-        if os.path.exists(base_voices_dir):
-            for file in os.listdir(base_voices_dir):
-                if file.endswith(".pt"):
-                    voice_name = file[:-3]
-                    voice_path = os.path.join(TTSModel.VOICES_DIR, file)
-                    if not os.path.exists(voice_path):
-                        try:
-                            logger.info(
-                                f"Copying base voice {voice_name} to voices directory"
-                            )
-                            base_path = os.path.join(base_voices_dir, file)
-                            voicepack = torch.load(
-                                base_path,
-                                map_location=TTSModel._device,
-                                weights_only=True,
-                            )
-                            torch.save(voicepack, voice_path)
-                        except Exception as e:
-                            logger.error(f"Error copying voice {voice_name}: {str(e)}")
-
-    def _split_text(self, text: str) -> List[str]:
-        """Split text into sentences"""
-        return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
-
-    def _get_voice_path(self, voice_name: str) -> Optional[str]:
-        """Get the path to a voice file.
-
-        Args:
-            voice_name: Name of the voice to find
-
-        Returns:
-            Path to the voice file if found, None otherwise
-        """
-        voice_path = os.path.join(TTSModel.VOICES_DIR, f"{voice_name}.pt")
-        return voice_path if os.path.exists(voice_path) else None
-
-    def _generate_audio(
-        self, text: str, voice: str, speed: float, stitch_long_output: bool = True
-    ) -> Tuple[torch.Tensor, float]:
-        """Generate audio and measure processing time"""
-        start_time = time.time()
-
-        try:
-            # Normalize text once at the start
-            text = normalize_text(text)
-            if not text:
-                raise ValueError("Text is empty after preprocessing")
-
-            # Check voice exists
-            voice_path = self._get_voice_path(voice)
-            if not voice_path:
-                raise ValueError(f"Voice not found: {voice}")
-
-            # Load model and voice
-            model = TTSModel._instance
-            voicepack = torch.load(
-                voice_path, map_location=TTSModel._device, weights_only=True
-            )
-
-            # Generate audio with or without stitching
-            if stitch_long_output:
-                chunks = self._split_text(text)
-                audio_chunks = []
-
-                # Process all chunks with same model/voicepack instance
-                for i, chunk in enumerate(chunks):
-                    try:
-                        # Validate phonemization first
-                        # ps = phonemize(chunk, voice[0])
-                        # tokens = tokenize(ps)
-                        # logger.debug(
-                        #     f"Processing chunk {i + 1}/{len(chunks)}: {len(tokens)} tokens"
-                        # )
-
-                        # Only proceed if phonemization succeeded
-                        chunk_audio, _ = generate(
-                            model, chunk, voicepack, lang=voice[0], speed=speed
-                        )
-                        if chunk_audio is not None:
-                            audio_chunks.append(chunk_audio)
-                        else:
-                            logger.error(
-                                f"No audio generated for chunk {i + 1}/{len(chunks)}"
-                            )
-                    except Exception as e:
-                        logger.error(
-                            f"Failed to generate audio for chunk {i + 1}/{len(chunks)}: '{chunk}'. Error: {str(e)}"
-                        )
-                        continue
-
-                if not audio_chunks:
-                    raise ValueError("No audio chunks were generated successfully")
-
-                audio = (
-                    np.concatenate(audio_chunks)
-                    if len(audio_chunks) > 1
-                    else audio_chunks[0]
-                )
-            else:
-                audio, _ = generate(model, text, voicepack, lang=voice[0], speed=speed)
-
-            processing_time = time.time() - start_time
-            return audio, processing_time
-
-        except Exception as e:
-            print(f"Error in audio generation: {str(e)}")
-            raise
-
-    def _save_audio(self, audio: torch.Tensor, filepath: str):
-        """Save audio to file"""
-        os.makedirs(os.path.dirname(filepath), exist_ok=True)
-        wavfile.write(filepath, 24000, audio)
-
-    def _audio_to_bytes(self, audio: torch.Tensor) -> bytes:
-        """Convert audio tensor to WAV bytes"""
-        buffer = io.BytesIO()
-        wavfile.write(buffer, 24000, audio)
-        return buffer.getvalue()
-
-    def combine_voices(self, voices: List[str]) -> str:
-        """Combine multiple voices into a new voice.
-
-        Args:
-            voices: List of voice names to combine
-
-        Returns:
-            Name of the combined voice
-
-        Raises:
-            ValueError: If less than 2 voices provided or voice loading fails
-            RuntimeError: If voice combination or saving fails
-        """
-        if len(voices) < 2:
-            raise ValueError("At least 2 voices are required for combination")
-
-        # Load voices
-        t_voices: List[torch.Tensor] = []
-        v_name: List[str] = []
-
-        for voice in voices:
-            try:
-                voice_path = os.path.join(TTSModel.VOICES_DIR, f"{voice}.pt")
-                voicepack = torch.load(
-                    voice_path, map_location=TTSModel._device, weights_only=True
-                )
-                t_voices.append(voicepack)
-                v_name.append(voice)
-            except Exception as e:
-                raise ValueError(f"Failed to load voice {voice}: {str(e)}")
-
-        # Combine voices
-        try:
-            f: str = "_".join(v_name)
-            v = torch.mean(torch.stack(t_voices), dim=0)
-            combined_path = os.path.join(TTSModel.VOICES_DIR, f"{f}.pt")
-
-            # Save combined voice
-            try:
-                torch.save(v, combined_path)
-            except Exception as e:
-                raise RuntimeError(
-                    f"Failed to save combined voice to {combined_path}: {str(e)}"
-                )
-
-            return f
-
-        except Exception as e:
-            if not isinstance(e, (ValueError, RuntimeError)):
-                raise RuntimeError(f"Error combining voices: {str(e)}")
-            raise
-
-    def list_voices(self) -> List[str]:
-        """List all available voices"""
-        voices = []
-        try:
-            for file in os.listdir(TTSModel.VOICES_DIR):
-                if file.endswith(".pt"):
-                    voices.append(file[:-3])  # Remove .pt extension
-        except Exception as e:
-            logger.error(f"Error listing voices: {str(e)}")
-        return sorted(voices)
diff --git a/api/src/services/tts_base.py b/api/src/services/tts_base.py
new file mode 100644
index 0000000..f502373
--- /dev/null
+++ b/api/src/services/tts_base.py
@@ -0,0 +1,136 @@
+import os
+import threading
+from abc import ABC, abstractmethod
+from typing import List, Tuple
+import torch
+import numpy as np
+from loguru import logger
+
+from ..core.config import settings
+
+class TTSBaseModel(ABC):
+    _instance = None
+    _lock = threading.Lock()
+    _device = None
+    VOICES_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "voices")
+
+    @classmethod
+    def setup(cls):
+        """Initialize model and setup voices"""
+        with cls._lock:
+            # Set device
+            cuda_available = torch.cuda.is_available()
+            logger.info(f"CUDA available: {cuda_available}")
+            if cuda_available:
+                try:
+                    # Test CUDA device
+                    test_tensor = torch.zeros(1).cuda()
+                    logger.info("CUDA test successful")
+                    model_path = os.path.join(settings.model_dir, settings.pytorch_model_path)
+                    cls._device = "cuda"
+                except Exception as e:
+                    logger.error(f"CUDA test failed: {e}")
+                    cls._device = "cpu"
+            else:
+                cls._device = "cpu"
+                model_path = os.path.join(settings.model_dir, settings.onnx_model_path)
+            logger.info(f"Initializing model on {cls._device}")
+
+            # Initialize model
+            if not cls.initialize(settings.model_dir, model_path=model_path):
+                raise RuntimeError(f"Failed to initialize {cls._device.upper()} model")
+
+            # Setup voices directory
+            os.makedirs(cls.VOICES_DIR, exist_ok=True)
+
+            # Copy base voices to local directory
+            base_voices_dir = os.path.join(settings.model_dir, settings.voices_dir)
+            if os.path.exists(base_voices_dir):
+                for file in os.listdir(base_voices_dir):
+                    if file.endswith(".pt"):
+                        voice_name = file[:-3]
+                        voice_path = os.path.join(cls.VOICES_DIR, file)
+                        if not os.path.exists(voice_path):
+                            try:
+                                logger.info(f"Copying base voice {voice_name} to voices directory")
+                                base_path = os.path.join(base_voices_dir, file)
+                                voicepack = torch.load(base_path, map_location=cls._device, weights_only=True)
+                                torch.save(voicepack, voice_path)
+                            except Exception as e:
+                                logger.error(f"Error copying voice {voice_name}: {str(e)}")
+
+            # Warm up with default voice
+            try:
+                dummy_text = "Hello"
+                voice_path = os.path.join(cls.VOICES_DIR, "af.pt")
+                dummy_voicepack = torch.load(voice_path, map_location=cls._device, weights_only=True)
+                
+                # Process text and generate audio
+                phonemes, tokens = cls.process_text(dummy_text, "a")
+                cls.generate_from_tokens(tokens, dummy_voicepack, 1.0)
+                
+                logger.info("Model warm-up complete")
+            except Exception as e:
+                logger.warning(f"Model warm-up failed: {e}")
+
+            # Count voices in directory
+            voice_count = len([f for f in os.listdir(cls.VOICES_DIR) if f.endswith(".pt")])
+            return voice_count
+
+    @classmethod
+    @abstractmethod
+    def initialize(cls, model_dir: str, model_path: str = None):
+        """Initialize the model"""
+        pass
+
+    @classmethod
+    @abstractmethod
+    def process_text(cls, text: str, language: str) -> Tuple[str, List[int]]:
+        """Process text into phonemes and tokens
+        
+        Args:
+            text: Input text
+            language: Language code
+            
+        Returns:
+            tuple[str, list[int]]: Phonemes and token IDs
+        """
+        pass
+
+    @classmethod
+    @abstractmethod
+    def generate_from_text(cls, text: str, voicepack: torch.Tensor, language: str, speed: float) -> Tuple[np.ndarray, str]:
+        """Generate audio from text
+        
+        Args:
+            text: Input text
+            voicepack: Voice tensor
+            language: Language code
+            speed: Speed factor
+            
+        Returns:
+            tuple[np.ndarray, str]: Generated audio samples and phonemes
+        """
+        pass
+
+    @classmethod
+    @abstractmethod
+    def generate_from_tokens(cls, tokens: List[int], voicepack: torch.Tensor, speed: float) -> np.ndarray:
+        """Generate audio from tokens
+        
+        Args:
+            tokens: Token IDs
+            voicepack: Voice tensor
+            speed: Speed factor
+            
+        Returns:
+            np.ndarray: Generated audio samples
+        """
+        pass
+
+    @classmethod
+    def get_device(cls):
+        """Get the current device"""
+        if cls._device is None:
+            raise RuntimeError("Model not initialized. Call setup() first.")
+        return cls._device
diff --git a/api/src/services/tts_cpu.py b/api/src/services/tts_cpu.py
new file mode 100644
index 0000000..0436a24
--- /dev/null
+++ b/api/src/services/tts_cpu.py
@@ -0,0 +1,144 @@
+import os
+import numpy as np
+import torch
+from onnxruntime import InferenceSession, SessionOptions, GraphOptimizationLevel, ExecutionMode
+from loguru import logger
+
+from .tts_base import TTSBaseModel
+from .text_processing import phonemize, tokenize
+from ..core.config import settings
+
+class TTSCPUModel(TTSBaseModel):
+    _instance = None
+    _onnx_session = None
+
+    @classmethod
+    def initialize(cls, model_dir: str, model_path: str = None):
+        """Initialize ONNX model for CPU inference"""
+        if cls._onnx_session is None:
+            # Try loading ONNX model
+            onnx_path = os.path.join(model_dir, settings.onnx_model_path)
+            if os.path.exists(onnx_path):
+                logger.info(f"Loading ONNX model from {onnx_path}")
+            else:
+                logger.error(f"ONNX model not found at {onnx_path}")
+                return None
+
+            if not onnx_path:
+                return None
+
+            logger.info(f"Loading ONNX model from {onnx_path}")
+            
+            # Configure ONNX session for optimal performance
+            session_options = SessionOptions()
+            
+            # Set optimization level
+            if settings.onnx_optimization_level == "all":
+                session_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
+            elif settings.onnx_optimization_level == "basic":
+                session_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC
+            else:
+                session_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL
+                
+            # Configure threading
+            session_options.intra_op_num_threads = settings.onnx_num_threads
+            session_options.inter_op_num_threads = settings.onnx_inter_op_threads
+            
+            # Set execution mode
+            session_options.execution_mode = (
+                ExecutionMode.ORT_PARALLEL 
+                if settings.onnx_execution_mode == "parallel" 
+                else ExecutionMode.ORT_SEQUENTIAL
+            )
+            
+            # Enable/disable memory pattern optimization
+            session_options.enable_mem_pattern = settings.onnx_memory_pattern
+
+            # Configure CPU provider options
+            provider_options = {
+                'CPUExecutionProvider': {
+                    'arena_extend_strategy': settings.onnx_arena_extend_strategy,
+                    'cpu_memory_arena_cfg': 'cpu:0'
+                }
+            }
+
+            cls._onnx_session = InferenceSession(
+                onnx_path,
+                sess_options=session_options,
+                providers=['CPUExecutionProvider'],
+                provider_options=[provider_options]
+            )
+            
+            return cls._onnx_session
+        return cls._onnx_session
+
+    @classmethod
+    def process_text(cls, text: str, language: str) -> tuple[str, list[int]]:
+        """Process text into phonemes and tokens
+        
+        Args:
+            text: Input text
+            language: Language code
+            
+        Returns:
+            tuple[str, list[int]]: Phonemes and token IDs
+        """
+        phonemes = phonemize(text, language)
+        tokens = tokenize(phonemes)
+        tokens = [0] + tokens + [0]  # Add start/end tokens
+        return phonemes, tokens
+
+    @classmethod
+    def generate_from_text(cls, text: str, voicepack: torch.Tensor, language: str, speed: float) -> tuple[np.ndarray, str]:
+        """Generate audio from text
+        
+        Args:
+            text: Input text
+            voicepack: Voice tensor
+            language: Language code
+            speed: Speed factor
+            
+        Returns:
+            tuple[np.ndarray, str]: Generated audio samples and phonemes
+        """
+        if cls._onnx_session is None:
+            raise RuntimeError("ONNX model not initialized")
+            
+        # Process text
+        phonemes, tokens = cls.process_text(text, language)
+        
+        # Generate audio
+        audio = cls.generate_from_tokens(tokens, voicepack, speed)
+        
+        return audio, phonemes
+
+    @classmethod
+    def generate_from_tokens(cls, tokens: list[int], voicepack: torch.Tensor, speed: float) -> np.ndarray:
+        """Generate audio from tokens
+        
+        Args:
+            tokens: Token IDs
+            voicepack: Voice tensor
+            speed: Speed factor
+            
+        Returns:
+            np.ndarray: Generated audio samples
+        """
+        if cls._onnx_session is None:
+            raise RuntimeError("ONNX model not initialized")
+
+        # Pre-allocate and prepare inputs
+        tokens_input = np.array([tokens], dtype=np.int64)
+        style_input = voicepack[len(tokens)-2].numpy()  # Already has correct dimensions
+        speed_input = np.full(1, speed, dtype=np.float32)  # More efficient than ones * speed
+        
+        # Run inference with optimized inputs
+        result = cls._onnx_session.run(
+            None,
+            {
+                'tokens': tokens_input,
+                'style': style_input,
+                'speed': speed_input
+            }
+        )
+        return result[0]
diff --git a/api/src/services/tts_gpu.py b/api/src/services/tts_gpu.py
new file mode 100644
index 0000000..300d141
--- /dev/null
+++ b/api/src/services/tts_gpu.py
@@ -0,0 +1,127 @@
+import os
+import numpy as np
+import torch
+from loguru import logger
+from models import build_model
+from .text_processing import phonemize, tokenize
+
+from .tts_base import TTSBaseModel
+from ..core.config import settings
+
+@torch.no_grad()
+def forward(model, tokens, ref_s, speed):
+    """Forward pass through the model"""
+    device = ref_s.device
+    tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
+    input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
+    text_mask = length_to_mask(input_lengths).to(device)
+    bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
+    d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
+    s = ref_s[:, 128:]
+    d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
+    x, _ = model.predictor.lstm(d)
+    duration = model.predictor.duration_proj(x)
+    duration = torch.sigmoid(duration).sum(axis=-1) / speed
+    pred_dur = torch.round(duration).clamp(min=1).long()
+    pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
+    c_frame = 0
+    for i in range(pred_aln_trg.size(0)):
+        pred_aln_trg[i, c_frame : c_frame + pred_dur[0, i].item()] = 1
+        c_frame += pred_dur[0, i].item()
+    en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
+    F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
+    t_en = model.text_encoder(tokens, input_lengths, text_mask)
+    asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
+    return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
+
+def length_to_mask(lengths):
+    """Create attention mask from lengths"""
+    mask = (
+        torch.arange(lengths.max())
+        .unsqueeze(0)
+        .expand(lengths.shape[0], -1)
+        .type_as(lengths)
+    )
+    mask = torch.gt(mask + 1, lengths.unsqueeze(1))
+    return mask
+
+class TTSGPUModel(TTSBaseModel):
+    _instance = None
+    _device = "cuda"
+
+    @classmethod
+    def initialize(cls, model_dir: str, model_path: str):
+        """Initialize PyTorch model for GPU inference"""
+        if cls._instance is None and torch.cuda.is_available():
+            try:
+                logger.info("Initializing GPU model")
+                model_path = os.path.join(model_dir, settings.pytorch_model_path)
+                model = build_model(model_path, cls._device)
+                cls._instance = model
+                return cls._instance
+            except Exception as e:
+                logger.error(f"Failed to initialize GPU model: {e}")
+                return None
+        return cls._instance
+
+    @classmethod
+    def process_text(cls, text: str, language: str) -> tuple[str, list[int]]:
+        """Process text into phonemes and tokens
+        
+        Args:
+            text: Input text
+            language: Language code
+            
+        Returns:
+            tuple[str, list[int]]: Phonemes and token IDs
+        """
+        phonemes = phonemize(text, language)
+        tokens = tokenize(phonemes)
+        return phonemes, tokens
+
+    @classmethod
+    def generate_from_text(cls, text: str, voicepack: torch.Tensor, language: str, speed: float) -> tuple[np.ndarray, str]:
+        """Generate audio from text
+        
+        Args:
+            text: Input text
+            voicepack: Voice tensor
+            language: Language code
+            speed: Speed factor
+            
+        Returns:
+            tuple[np.ndarray, str]: Generated audio samples and phonemes
+        """
+        if cls._instance is None:
+            raise RuntimeError("GPU model not initialized")
+            
+        # Process text
+        phonemes, tokens = cls.process_text(text, language)
+        
+        # Generate audio
+        audio = cls.generate_from_tokens(tokens, voicepack, speed)
+        
+        return audio, phonemes
+
+    @classmethod
+    def generate_from_tokens(cls, tokens: list[int], voicepack: torch.Tensor, speed: float) -> np.ndarray:
+        """Generate audio from tokens
+        
+        Args:
+            tokens: Token IDs
+            voicepack: Voice tensor
+            speed: Speed factor
+            
+        Returns:
+            np.ndarray: Generated audio samples
+        """
+        if cls._instance is None:
+            raise RuntimeError("GPU model not initialized")
+            
+        # Get reference style
+        ref_s = voicepack[len(tokens)]
+        
+        # Generate audio
+        audio = forward(cls._instance, tokens, ref_s, speed)
+            
+        return audio
diff --git a/api/src/services/tts_model.py b/api/src/services/tts_model.py
new file mode 100644
index 0000000..1e04939
--- /dev/null
+++ b/api/src/services/tts_model.py
@@ -0,0 +1,8 @@
+import torch
+
+if torch.cuda.is_available():
+    from .tts_gpu import TTSGPUModel as TTSModel
+else:
+    from .tts_cpu import TTSCPUModel as TTSModel
+
+__all__ = ["TTSModel"]
diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py
new file mode 100644
index 0000000..6d763fe
--- /dev/null
+++ b/api/src/services/tts_service.py
@@ -0,0 +1,161 @@
+import io
+import os
+import re
+import time
+from typing import List, Tuple, Optional
+
+import numpy as np
+import torch
+import scipy.io.wavfile as wavfile
+from .text_processing import normalize_text
+from loguru import logger
+
+from ..core.config import settings
+from .tts_model import TTSModel
+
+
+class TTSService:
+    def __init__(self, output_dir: str = None):
+        self.output_dir = output_dir
+
+    def _split_text(self, text: str) -> List[str]:
+        """Split text into sentences"""
+        if not isinstance(text, str):
+            text = str(text) if text is not None else ""
+        return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
+
+    def _get_voice_path(self, voice_name: str) -> Optional[str]:
+        """Get the path to a voice file"""
+        voice_path = os.path.join(TTSModel.VOICES_DIR, f"{voice_name}.pt")
+        return voice_path if os.path.exists(voice_path) else None
+
+    def _generate_audio(
+        self, text: str, voice: str, speed: float, stitch_long_output: bool = True
+    ) -> Tuple[torch.Tensor, float]:
+        """Generate audio and measure processing time"""
+        start_time = time.time()
+
+        try:
+            # Normalize text once at the start
+            if not text:
+                raise ValueError("Text is empty after preprocessing")
+            normalized = normalize_text(text)
+            if not normalized:
+                raise ValueError("Text is empty after preprocessing")
+            text = str(normalized)
+
+            # Check voice exists
+            voice_path = self._get_voice_path(voice)
+            if not voice_path:
+                raise ValueError(f"Voice not found: {voice}")
+
+            # Load voice
+            voicepack = torch.load(
+                voice_path, map_location=TTSModel.get_device(), weights_only=True
+            )
+
+            # Generate audio with or without stitching
+            if stitch_long_output:
+                chunks = self._split_text(text)
+                audio_chunks = []
+
+                # Process all chunks
+                for i, chunk in enumerate(chunks):
+                    try:
+                        # Process text and generate audio
+                        phonemes, tokens = TTSModel.process_text(chunk, voice[0])
+                        chunk_audio = TTSModel.generate_from_tokens(tokens, voicepack, speed)
+    
+                        if chunk_audio is not None:
+                            audio_chunks.append(chunk_audio)
+                        else:
+                            logger.error(f"No audio generated for chunk {i + 1}/{len(chunks)}")
+                            
+                    except Exception as e:
+                        logger.error(
+                            f"Failed to generate audio for chunk {i + 1}/{len(chunks)}: '{chunk}'. Error: {str(e)}"
+                        )
+                        continue
+
+                if not audio_chunks:
+                    raise ValueError("No audio chunks were generated successfully")
+
+                audio = (
+                    np.concatenate(audio_chunks)
+                    if len(audio_chunks) > 1
+                    else audio_chunks[0]
+                )
+            else:
+                # Process single chunk
+                phonemes, tokens = TTSModel.process_text(text, voice[0])
+                audio = TTSModel.generate_from_tokens(tokens, voicepack, speed)
+
+            processing_time = time.time() - start_time
+            return audio, processing_time
+
+        except Exception as e:
+            logger.error(f"Error in audio generation: {str(e)}")
+            raise
+
+    def _save_audio(self, audio: torch.Tensor, filepath: str):
+        """Save audio to file"""
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+        wavfile.write(filepath, 24000, audio)
+
+    def _audio_to_bytes(self, audio: torch.Tensor) -> bytes:
+        """Convert audio tensor to WAV bytes"""
+        buffer = io.BytesIO()
+        wavfile.write(buffer, 24000, audio)
+        return buffer.getvalue()
+
+    def combine_voices(self, voices: List[str]) -> str:
+        """Combine multiple voices into a new voice"""
+        if len(voices) < 2:
+            raise ValueError("At least 2 voices are required for combination")
+
+        # Load voices
+        t_voices: List[torch.Tensor] = []
+        v_name: List[str] = []
+
+        for voice in voices:
+            try:
+                voice_path = os.path.join(TTSModel.VOICES_DIR, f"{voice}.pt")
+                voicepack = torch.load(
+                    voice_path, map_location=TTSModel.get_device(), weights_only=True
+                )
+                t_voices.append(voicepack)
+                v_name.append(voice)
+            except Exception as e:
+                raise ValueError(f"Failed to load voice {voice}: {str(e)}")
+
+        # Combine voices
+        try:
+            f: str = "_".join(v_name)
+            v = torch.mean(torch.stack(t_voices), dim=0)
+            combined_path = os.path.join(TTSModel.VOICES_DIR, f"{f}.pt")
+
+            # Save combined voice
+            try:
+                torch.save(v, combined_path)
+            except Exception as e:
+                raise RuntimeError(
+                    f"Failed to save combined voice to {combined_path}: {str(e)}"
+                )
+
+            return f
+
+        except Exception as e:
+            if not isinstance(e, (ValueError, RuntimeError)):
+                raise RuntimeError(f"Error combining voices: {str(e)}")
+            raise
+
+    def list_voices(self) -> List[str]:
+        """List all available voices"""
+        voices = []
+        try:
+            for file in os.listdir(TTSModel.VOICES_DIR):
+                if file.endswith(".pt"):
+                    voices.append(file[:-3])  # Remove .pt extension
+        except Exception as e:
+            logger.error(f"Error listing voices: {str(e)}")
+        return sorted(voices)
diff --git a/api/src/structures/text_schemas.py b/api/src/structures/text_schemas.py
new file mode 100644
index 0000000..5ae1b08
--- /dev/null
+++ b/api/src/structures/text_schemas.py
@@ -0,0 +1,9 @@
+from pydantic import BaseModel
+
+class PhonemeRequest(BaseModel):
+    text: str
+    language: str = "a"  # Default to American English
+
+class PhonemeResponse(BaseModel):
+    phonemes: str
+    tokens: list[int]
diff --git a/api/tests/conftest.py b/api/tests/conftest.py
index c41172f..fba270b 100644
--- a/api/tests/conftest.py
+++ b/api/tests/conftest.py
@@ -21,8 +21,73 @@ def cleanup():
     cleanup_mock_dirs()
 
 
-# Mock torch and other ML modules before they're imported
-sys.modules["torch"] = Mock()
+# Create mock torch module
+mock_torch = Mock()
+mock_torch.cuda = Mock()
+mock_torch.cuda.is_available = Mock(return_value=False)
+
+# Create a mock tensor class that supports basic operations
+class MockTensor:
+    def __init__(self, data):
+        self.data = data
+        if isinstance(data, (list, tuple)):
+            self.shape = [len(data)]
+        elif isinstance(data, MockTensor):
+            self.shape = data.shape
+        else:
+            self.shape = getattr(data, 'shape', [1])
+        
+    def __getitem__(self, idx):
+        if isinstance(self.data, (list, tuple)):
+            if isinstance(idx, slice):
+                return MockTensor(self.data[idx])
+            return self.data[idx]
+        return self
+        
+    def max(self):
+        if isinstance(self.data, (list, tuple)):
+            max_val = max(self.data)
+            return MockTensor(max_val)
+        return 5  # Default for testing
+        
+    def item(self):
+        if isinstance(self.data, (list, tuple)):
+            return max(self.data)
+        if isinstance(self.data, (int, float)):
+            return self.data
+        return 5  # Default for testing
+        
+    def cuda(self):
+        """Support cuda conversion"""
+        return self
+        
+    def any(self):
+        if isinstance(self.data, (list, tuple)):
+            return any(self.data)
+        return False
+        
+    def all(self):
+        if isinstance(self.data, (list, tuple)):
+            return all(self.data)
+        return True
+        
+    def unsqueeze(self, dim):
+        return self
+        
+    def expand(self, *args):
+        return self
+        
+    def type_as(self, other):
+        return self
+
+# Add tensor operations to mock torch
+mock_torch.tensor = lambda x: MockTensor(x)
+mock_torch.zeros = lambda *args: MockTensor([0] * (args[0] if isinstance(args[0], int) else args[0][0]))
+mock_torch.arange = lambda x: MockTensor(list(range(x)))
+mock_torch.gt = lambda x, y: MockTensor([False] * x.shape[0])
+
+# Mock modules before they're imported
+sys.modules["torch"] = mock_torch
 sys.modules["transformers"] = Mock()
 sys.modules["phonemizer"] = Mock()
 sys.modules["models"] = Mock()
@@ -31,14 +96,22 @@ sys.modules["kokoro"] = Mock()
 sys.modules["kokoro.generate"] = Mock()
 sys.modules["kokoro.phonemize"] = Mock()
 sys.modules["kokoro.tokenize"] = Mock()
+sys.modules["onnxruntime"] = Mock()
 
 
 @pytest.fixture(autouse=True)
 def mock_tts_model():
-    """Mock TTSModel to avoid loading real models during tests"""
-    with patch("api.src.services.tts.TTSModel") as mock:
+    """Mock TTSModel and TTS model initialization"""
+    with patch("api.src.services.tts_model.TTSModel") as mock_tts_model, \
+         patch("api.src.services.tts_base.TTSBaseModel") as mock_base_model:
+        
+        # Mock TTSModel
         model_instance = Mock()
         model_instance.get_instance.return_value = model_instance
         model_instance.get_voicepack.return_value = None
-        mock.get_instance.return_value = model_instance
+        mock_tts_model.get_instance.return_value = model_instance
+        
+        # Mock TTS model initialization
+        mock_base_model.setup.return_value = 1  # Return dummy voice count
+        
         yield model_instance
diff --git a/api/tests/test_main.py b/api/tests/test_main.py
index 5b23749..c6a972e 100644
--- a/api/tests/test_main.py
+++ b/api/tests/test_main.py
@@ -26,13 +26,11 @@ def test_health_check(test_client):
 @patch("api.src.main.logger")
 async def test_lifespan_successful_warmup(mock_logger, mock_tts_model):
     """Test successful model warmup in lifespan"""
-    # Mock the model initialization with model info and voicepack count
-    mock_model = MagicMock()
     # Mock file system for voice counting
     mock_tts_model.VOICES_DIR = "/mock/voices"
     with patch("os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt"]):
-        mock_tts_model.initialize.return_value = (mock_model, 3)  # 3 voice files
-        mock_tts_model._device = "cuda"  # Set device class variable
+        mock_tts_model.setup.return_value = 3  # 3 voice files
+        mock_tts_model.get_device.return_value = "cuda"
 
     # Create an async generator from the lifespan context manager
     async_gen = lifespan(MagicMock())
@@ -44,8 +42,8 @@ async def test_lifespan_successful_warmup(mock_logger, mock_tts_model):
     mock_logger.info.assert_any_call("Model loaded and warmed up on cuda")
     mock_logger.info.assert_any_call("3 voice packs loaded successfully")
 
-    # Verify model initialization was called
-    mock_tts_model.initialize.assert_called_once()
+    # Verify model setup was called
+    mock_tts_model.setup.assert_called_once()
 
     # Clean up
     await async_gen.__aexit__(None, None, None)
@@ -56,14 +54,14 @@ async def test_lifespan_successful_warmup(mock_logger, mock_tts_model):
 @patch("api.src.main.logger")
 async def test_lifespan_failed_warmup(mock_logger, mock_tts_model):
     """Test failed model warmup in lifespan"""
-    # Mock the model initialization to fail
-    mock_tts_model.initialize.side_effect = Exception("Failed to initialize model")
+    # Mock the model setup to fail
+    mock_tts_model.setup.side_effect = RuntimeError("Failed to initialize model")
 
     # Create an async generator from the lifespan context manager
     async_gen = lifespan(MagicMock())
 
     # Verify the exception is raised
-    with pytest.raises(Exception, match="Failed to initialize model"):
+    with pytest.raises(RuntimeError, match="Failed to initialize model"):
         await async_gen.__aenter__()
 
     # Verify the expected logging sequence
@@ -77,20 +75,18 @@ async def test_lifespan_failed_warmup(mock_logger, mock_tts_model):
 @patch("api.src.main.TTSModel")
 async def test_lifespan_cuda_warmup(mock_tts_model):
     """Test model warmup specifically on CUDA"""
-    # Mock the model initialization with CUDA and voicepacks
-    mock_model = MagicMock()
     # Mock file system for voice counting
     mock_tts_model.VOICES_DIR = "/mock/voices"
     with patch("os.listdir", return_value=["voice1.pt", "voice2.pt"]):
-        mock_tts_model.initialize.return_value = (mock_model, 2)  # 2 voice files
-        mock_tts_model._device = "cuda"  # Set device class variable
+        mock_tts_model.setup.return_value = 2  # 2 voice files
+        mock_tts_model.get_device.return_value = "cuda"
 
     # Create an async generator from the lifespan context manager
     async_gen = lifespan(MagicMock())
     await async_gen.__aenter__()
 
-    # Verify model was initialized
-    mock_tts_model.initialize.assert_called_once()
+    # Verify model setup was called
+    mock_tts_model.setup.assert_called_once()
 
     # Clean up
     await async_gen.__aexit__(None, None, None)
@@ -100,22 +96,20 @@ async def test_lifespan_cuda_warmup(mock_tts_model):
 @patch("api.src.main.TTSModel")
 async def test_lifespan_cpu_fallback(mock_tts_model):
     """Test model warmup falling back to CPU"""
-    # Mock the model initialization with CPU and voicepacks
-    mock_model = MagicMock()
     # Mock file system for voice counting
     mock_tts_model.VOICES_DIR = "/mock/voices"
     with patch(
         "os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt", "voice4.pt"]
     ):
-        mock_tts_model.initialize.return_value = (mock_model, 4)  # 4 voice files
-        mock_tts_model._device = "cpu"  # Set device class variable
+        mock_tts_model.setup.return_value = 4  # 4 voice files
+        mock_tts_model.get_device.return_value = "cpu"
 
     # Create an async generator from the lifespan context manager
     async_gen = lifespan(MagicMock())
     await async_gen.__aenter__()
 
-    # Verify model was initialized
-    mock_tts_model.initialize.assert_called_once()
+    # Verify model setup was called
+    mock_tts_model.setup.assert_called_once()
 
     # Clean up
     await async_gen.__aexit__(None, None, None)
diff --git a/api/tests/test_tts_implementations.py b/api/tests/test_tts_implementations.py
new file mode 100644
index 0000000..3f10c17
--- /dev/null
+++ b/api/tests/test_tts_implementations.py
@@ -0,0 +1,144 @@
+"""Tests for TTS model implementations"""
+import os
+import torch
+import pytest
+import numpy as np
+from unittest.mock import patch, MagicMock
+
+from api.src.services.tts_base import TTSBaseModel
+from api.src.services.tts_cpu import TTSCPUModel
+from api.src.services.tts_gpu import TTSGPUModel, length_to_mask
+
+# Base Model Tests
+def test_get_device_error():
+    """Test get_device() raises error when not initialized"""
+    TTSBaseModel._device = None
+    with pytest.raises(RuntimeError, match="Model not initialized"):
+        TTSBaseModel.get_device()
+
+@patch('torch.cuda.is_available')
+@patch('os.path.exists')
+@patch('os.path.join')
+@patch('os.listdir')
+@patch('torch.load')
+@patch('torch.save')
+def test_setup_cuda_available(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
+    """Test setup with CUDA available"""
+    TTSBaseModel._device = None
+    mock_cuda_available.return_value = True
+    mock_exists.return_value = True
+    mock_load.return_value = torch.zeros(1)
+    mock_listdir.return_value = ["voice1.pt", "voice2.pt"]
+    mock_join.return_value = "/mocked/path"
+    
+    # Mock the abstract methods
+    TTSBaseModel.initialize = MagicMock(return_value=True)
+    TTSBaseModel.process_text = MagicMock(return_value=("dummy", [1,2,3]))
+    TTSBaseModel.generate_from_tokens = MagicMock(return_value=np.zeros(1000))
+    
+    voice_count = TTSBaseModel.setup()
+    assert TTSBaseModel._device == "cuda"
+    assert voice_count == 2
+
+@patch('torch.cuda.is_available')
+@patch('os.path.exists')
+@patch('os.path.join')
+@patch('os.listdir')
+@patch('torch.load')
+@patch('torch.save')
+def test_setup_cuda_unavailable(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
+    """Test setup with CUDA unavailable"""
+    TTSBaseModel._device = None
+    mock_cuda_available.return_value = False
+    mock_exists.return_value = True
+    mock_load.return_value = torch.zeros(1)
+    mock_listdir.return_value = ["voice1.pt", "voice2.pt"]
+    mock_join.return_value = "/mocked/path"
+    
+    # Mock the abstract methods
+    TTSBaseModel.initialize = MagicMock(return_value=True)
+    TTSBaseModel.process_text = MagicMock(return_value=("dummy", [1,2,3]))
+    TTSBaseModel.generate_from_tokens = MagicMock(return_value=np.zeros(1000))
+    
+    voice_count = TTSBaseModel.setup()
+    assert TTSBaseModel._device == "cpu"
+    assert voice_count == 2
+
+# CPU Model Tests
+def test_cpu_initialize_missing_model():
+    """Test CPU initialize with missing model"""
+    with patch('os.path.exists', return_value=False):
+        result = TTSCPUModel.initialize("dummy_dir")
+        assert result is None
+
+def test_cpu_generate_uninitialized():
+    """Test CPU generate methods with uninitialized model"""
+    TTSCPUModel._onnx_session = None
+    
+    with pytest.raises(RuntimeError, match="ONNX model not initialized"):
+        TTSCPUModel.generate_from_text("test", torch.zeros(1), "en", 1.0)
+        
+    with pytest.raises(RuntimeError, match="ONNX model not initialized"):
+        TTSCPUModel.generate_from_tokens([1,2,3], torch.zeros(1), 1.0)
+
+def test_cpu_process_text():
+    """Test CPU process_text functionality"""
+    with patch('api.src.services.tts_cpu.phonemize') as mock_phonemize, \
+         patch('api.src.services.tts_cpu.tokenize') as mock_tokenize:
+        
+        mock_phonemize.return_value = "test phonemes"
+        mock_tokenize.return_value = [1, 2, 3]
+        
+        phonemes, tokens = TTSCPUModel.process_text("test", "en")
+        assert phonemes == "test phonemes"
+        assert tokens == [0, 1, 2, 3, 0]  # Should add start/end tokens
+
+# GPU Model Tests
+@patch('torch.cuda.is_available')
+def test_gpu_initialize_cuda_unavailable(mock_cuda_available):
+    """Test GPU initialize with CUDA unavailable"""
+    mock_cuda_available.return_value = False
+    TTSGPUModel._instance = None
+    
+    result = TTSGPUModel.initialize("dummy_dir", "dummy_path")
+    assert result is None
+
+@patch('api.src.services.tts_gpu.length_to_mask')
+def test_gpu_length_to_mask(mock_length_to_mask):
+    """Test length_to_mask function"""
+    # Setup mock return value
+    expected_mask = torch.tensor([
+        [False, False, False, True, True],
+        [False, False, False, False, False]
+    ])
+    mock_length_to_mask.return_value = expected_mask
+    
+    # Call function with test input
+    lengths = torch.tensor([3, 5])
+    mask = mock_length_to_mask(lengths)
+    
+    # Verify mock was called with correct input
+    mock_length_to_mask.assert_called_once()
+    assert torch.equal(mask, expected_mask)
+
+def test_gpu_generate_uninitialized():
+    """Test GPU generate methods with uninitialized model"""
+    TTSGPUModel._instance = None
+    
+    with pytest.raises(RuntimeError, match="GPU model not initialized"):
+        TTSGPUModel.generate_from_text("test", torch.zeros(1), "en", 1.0)
+        
+    with pytest.raises(RuntimeError, match="GPU model not initialized"):
+        TTSGPUModel.generate_from_tokens([1,2,3], torch.zeros(1), 1.0)
+
+def test_gpu_process_text():
+    """Test GPU process_text functionality"""
+    with patch('api.src.services.tts_gpu.phonemize') as mock_phonemize, \
+         patch('api.src.services.tts_gpu.tokenize') as mock_tokenize:
+        
+        mock_phonemize.return_value = "test phonemes"
+        mock_tokenize.return_value = [1, 2, 3]
+        
+        phonemes, tokens = TTSGPUModel.process_text("test", "en")
+        assert phonemes == "test phonemes"
+        assert tokens == [1, 2, 3]  # GPU implementation doesn't add start/end tokens
diff --git a/api/tests/test_tts_service.py b/api/tests/test_tts_service.py
index d2a138b..4e63ff1 100644
--- a/api/tests/test_tts_service.py
+++ b/api/tests/test_tts_service.py
@@ -6,14 +6,19 @@ from unittest.mock import MagicMock, call, patch
 import numpy as np
 import torch
 import pytest
+from onnxruntime import InferenceSession
 
-from api.src.services.tts import TTSModel, TTSService
+from api.src.core.config import settings
+from api.src.services.tts_model import TTSModel
+from api.src.services.tts_service import TTSService
+from api.src.services.tts_cpu import TTSCPUModel
+from api.src.services.tts_gpu import TTSGPUModel
 
 
 @pytest.fixture
 def tts_service():
     """Create a TTSService instance for testing"""
-    return TTSService(start_worker=False)
+    return TTSService()
 
 
 @pytest.fixture
@@ -68,80 +73,143 @@ def test_list_voices(mock_join, mock_listdir, tts_service):
     assert "not_a_voice" not in voices
 
 
-@patch("api.src.services.tts.TTSModel.get_instance")
-@patch("api.src.services.tts.TTSModel.get_voicepack")
-@patch("api.src.services.tts.normalize_text")
-@patch("api.src.services.tts.phonemize")
-@patch("api.src.services.tts.tokenize")
-@patch("api.src.services.tts.generate")
-def test_generate_audio_empty_text(
-    mock_generate,
-    mock_tokenize,
-    mock_phonemize,
-    mock_normalize,
-    mock_voicepack,
-    mock_instance,
-    tts_service,
-):
-    """Test generating audio with empty text"""
-    mock_normalize.return_value = ""
+@patch("os.listdir")
+def test_list_voices_error(mock_listdir, tts_service):
+    """Test error handling in list_voices"""
+    mock_listdir.side_effect = Exception("Failed to list directory")
 
+    voices = tts_service.list_voices()
+    assert voices == []
+
+
+def mock_model_setup(cuda_available=False):
+    """Helper function to mock model setup"""
+    # Reset model state
+    TTSModel._instance = None
+    TTSModel._device = None
+    TTSModel._voicepacks = {}
+
+    # Create mock model instance with proper generate method
+    mock_model = MagicMock()
+    mock_model.generate.return_value = np.zeros(24000, dtype=np.float32)
+    TTSModel._instance = mock_model
+
+    # Set device based on CUDA availability
+    TTSModel._device = "cuda" if cuda_available else "cpu"
+    
+    return 3  # Return voice count (including af.pt)
+
+
+def test_model_initialization_cuda():
+    """Test model initialization with CUDA"""
+    # Simulate CUDA availability
+    voice_count = mock_model_setup(cuda_available=True)
+    
+    assert TTSModel.get_device() == "cuda"
+    assert voice_count == 3  # voice1.pt, voice2.pt, af.pt
+
+
+def test_model_initialization_cpu():
+    """Test model initialization with CPU"""
+    # Simulate no CUDA availability
+    voice_count = mock_model_setup(cuda_available=False)
+    
+    assert TTSModel.get_device() == "cpu"
+    assert voice_count == 3  # voice1.pt, voice2.pt, af.pt
+
+
+def test_generate_audio_empty_text(tts_service):
+    """Test generating audio with empty text"""
     with pytest.raises(ValueError, match="Text is empty after preprocessing"):
         tts_service._generate_audio("", "af", 1.0)
 
 
-@patch("api.src.services.tts.TTSModel.get_instance")
+@patch("api.src.services.tts_model.TTSModel.get_instance")
+@patch("api.src.services.tts_model.TTSModel.get_device")
 @patch("os.path.exists")
-@patch("api.src.services.tts.normalize_text")
-@patch("api.src.services.tts.phonemize")
-@patch("api.src.services.tts.tokenize")
-@patch("api.src.services.tts.generate")
+@patch("kokoro.normalize_text")
+@patch("kokoro.phonemize")
+@patch("kokoro.tokenize")
+@patch("kokoro.generate")
 @patch("torch.load")
-def test_generate_audio_no_chunks(
+def test_generate_audio_phonemize_error(
     mock_torch_load,
     mock_generate,
     mock_tokenize,
     mock_phonemize,
     mock_normalize,
     mock_exists,
+    mock_get_device,
     mock_instance,
     tts_service,
 ):
-    """Test generating audio with no successful chunks"""
+    """Test handling phonemization error"""
     mock_normalize.return_value = "Test text"
-    mock_phonemize.return_value = "Test text"
-    mock_tokenize.return_value = ["test", "text"]
-    mock_generate.return_value = (None, None)
-    mock_instance.return_value = (MagicMock(), "cpu")
+    mock_phonemize.side_effect = Exception("Phonemization failed")
+    mock_instance.return_value = (mock_generate, "cpu")  # Use the same mock for consistency
+    mock_get_device.return_value = "cpu"
     mock_exists.return_value = True
-    mock_torch_load.return_value = MagicMock()
+    mock_torch_load.return_value = torch.zeros((10, 24000))
+    mock_generate.return_value = (None, None)
 
     with pytest.raises(ValueError, match="No audio chunks were generated successfully"):
         tts_service._generate_audio("Test text", "af", 1.0)
 
 
-@patch("torch.load")
-@patch("torch.save")
-@patch("torch.stack")
-@patch("torch.mean")
+@patch("api.src.services.tts_model.TTSModel.get_instance")
+@patch("api.src.services.tts_model.TTSModel.get_device")
 @patch("os.path.exists")
-def test_combine_voices(
-    mock_exists, mock_mean, mock_stack, mock_save, mock_load, tts_service
+@patch("kokoro.normalize_text")
+@patch("kokoro.phonemize")
+@patch("kokoro.tokenize")
+@patch("kokoro.generate")
+@patch("torch.load")
+def test_generate_audio_error(
+    mock_torch_load,
+    mock_generate,
+    mock_tokenize,
+    mock_phonemize,
+    mock_normalize,
+    mock_exists,
+    mock_get_device,
+    mock_instance,
+    tts_service,
 ):
-    """Test combining multiple voices"""
-    # Setup mocks
+    """Test handling generation error"""
+    mock_normalize.return_value = "Test text"
+    mock_phonemize.return_value = "Test text"
+    mock_tokenize.return_value = [1, 2]  # Return integers instead of strings
+    mock_generate.side_effect = Exception("Generation failed")
+    mock_instance.return_value = (mock_generate, "cpu")  # Use the same mock for consistency
+    mock_get_device.return_value = "cpu"
     mock_exists.return_value = True
-    mock_load.return_value = torch.tensor([1.0, 2.0])
-    mock_stack.return_value = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
-    mock_mean.return_value = torch.tensor([2.0, 3.0])
+    mock_torch_load.return_value = torch.zeros((10, 24000))
 
-    # Test combining two voices
-    result = tts_service.combine_voices(["voice1", "voice2"])
+    with pytest.raises(ValueError, match="No audio chunks were generated successfully"):
+        tts_service._generate_audio("Test text", "af", 1.0)
 
-    assert result == "voice1_voice2"
-    mock_stack.assert_called_once()
-    mock_mean.assert_called_once()
-    mock_save.assert_called_once()
+
+def test_save_audio(tts_service, sample_audio, tmp_path):
+    """Test saving audio to file"""
+    output_path = os.path.join(tmp_path, "test_output.wav")
+    tts_service._save_audio(sample_audio, output_path)
+    assert os.path.exists(output_path)
+    assert os.path.getsize(output_path) > 0
+
+
+def test_combine_voices(tts_service):
+    """Test combining multiple voices"""
+    # Setup mocks for torch operations
+    with patch('torch.load', return_value=torch.tensor([1.0, 2.0])), \
+            patch('torch.stack', return_value=torch.tensor([[1.0, 2.0], [3.0, 4.0]])), \
+            patch('torch.mean', return_value=torch.tensor([2.0, 3.0])), \
+            patch('torch.save'), \
+            patch('os.path.exists', return_value=True):
+        
+        # Test combining two voices
+        result = tts_service.combine_voices(["voice1", "voice2"])
+
+        assert result == "voice1_voice2"
 
 
 def test_combine_voices_invalid_input(tts_service):
@@ -155,221 +223,17 @@ def test_combine_voices_invalid_input(tts_service):
         tts_service.combine_voices(["voice1"])
 
 
-@patch("os.makedirs")
-@patch("os.path.exists")
-@patch("os.listdir")
-@patch("torch.load")
-@patch("torch.save")
-@patch("os.path.join")
-def test_ensure_voices(
-    mock_join,
-    mock_save,
-    mock_load,
-    mock_listdir,
-    mock_exists,
-    mock_makedirs,
-    tts_service,
-):
-    """Test voice directory initialization"""
-    # Setup mocks
-    mock_exists.side_effect = [
-        True,
-        False,
-        False,
-    ]  # base_dir exists, voice files don't exist
-    mock_listdir.return_value = ["voice1.pt", "voice2.pt"]
-    mock_load.return_value = MagicMock()
-    mock_join.return_value = "/fake/path"
-
-    # Test voice directory initialization
-    tts_service._ensure_voices()
-
-    # Verify directory was created
-    mock_makedirs.assert_called_once()
-
-    # Verify voices were loaded and saved
-    assert mock_load.call_count == len(mock_listdir.return_value)
-    assert mock_save.call_count == len(mock_listdir.return_value)
-
-
-@patch("api.src.services.tts.TTSModel.get_instance")
-@patch("os.path.exists")
-@patch("api.src.services.tts.normalize_text")
-@patch("api.src.services.tts.phonemize")
-@patch("api.src.services.tts.tokenize")
-@patch("api.src.services.tts.generate")
-@patch("torch.load")
-def test_generate_audio_success(
-    mock_torch_load,
-    mock_generate,
-    mock_tokenize,
-    mock_phonemize,
-    mock_normalize,
-    mock_exists,
-    mock_instance,
-    tts_service,
-    sample_audio,
-):
-    """Test successful audio generation"""
-    mock_normalize.return_value = "Test text"
-    mock_phonemize.return_value = "Test text"
-    mock_tokenize.return_value = ["test", "text"]
-    mock_generate.return_value = (sample_audio, None)
-    mock_instance.return_value = (MagicMock(), "cpu")
-    mock_exists.return_value = True
-    mock_torch_load.return_value = MagicMock()
-
-    audio, processing_time = tts_service._generate_audio("Test text", "af", 1.0)
-    assert isinstance(audio, np.ndarray)
-    assert isinstance(processing_time, float)
-    assert len(audio) > 0
-
-
-@patch("api.src.services.tts.torch.cuda.is_available")
-@patch("api.src.services.tts.build_model")
-def test_model_initialization_cuda(mock_build_model, mock_cuda_available):
-    """Test model initialization with CUDA"""
-    mock_cuda_available.return_value = True
-    mock_model = MagicMock()
-    mock_build_model.return_value = mock_model
-
-    TTSModel._instance = None  # Reset singleton
-    model, voice_count = TTSModel.initialize()
-
-    assert TTSModel._device == "cuda"  # Check the class variable instead
-    assert model == mock_model
-    mock_build_model.assert_called_once()
-
-
-@patch("api.src.services.tts.torch.cuda.is_available")
-@patch("api.src.services.tts.build_model")
-def test_model_initialization_cpu(mock_build_model, mock_cuda_available):
-    """Test model initialization with CPU"""
-    mock_cuda_available.return_value = False
-    mock_model = MagicMock()
-    mock_build_model.return_value = mock_model
-
-    TTSModel._instance = None  # Reset singleton
-    model, voice_count = TTSModel.initialize()
-
-    assert TTSModel._device == "cpu"  # Check the class variable instead
-    assert model == mock_model
-    mock_build_model.assert_called_once()
-
-
-@patch("api.src.services.tts.TTSService._get_voice_path")
-@patch("api.src.services.tts.TTSModel.get_instance")
+@patch("api.src.services.tts_service.TTSService._get_voice_path")
+@patch("api.src.services.tts_model.TTSModel.get_instance")
 def test_voicepack_loading_error(mock_get_instance, mock_get_voice_path):
     """Test voicepack loading error handling"""
     mock_get_voice_path.return_value = None
-    mock_get_instance.return_value = (MagicMock(), "cpu")
+    mock_instance = MagicMock()
+    mock_instance.generate.return_value = np.zeros(24000, dtype=np.float32)
+    mock_get_instance.return_value = (mock_instance, "cpu")
 
     TTSModel._voicepacks = {}  # Reset voicepacks
 
-    service = TTSService(start_worker=False)
+    service = TTSService()
     with pytest.raises(ValueError, match="Voice not found: nonexistent_voice"):
         service._generate_audio("test", "nonexistent_voice", 1.0)
-
-
-@patch("api.src.services.tts.TTSModel")
-def test_save_audio(mock_tts_model, tts_service, sample_audio, tmp_path):
-    """Test saving audio to file"""
-    output_dir = os.path.join(tmp_path, "test_output")
-    os.makedirs(output_dir, exist_ok=True)
-    output_path = os.path.join(output_dir, "audio.wav")
-
-    tts_service._save_audio(sample_audio, output_path)
-
-    assert os.path.exists(output_path)
-    assert os.path.getsize(output_path) > 0
-
-
-@patch("api.src.services.tts.TTSModel.get_instance")
-@patch("os.path.exists")
-@patch("api.src.services.tts.normalize_text")
-@patch("api.src.services.tts.generate")
-@patch("torch.load")
-def test_generate_audio_without_stitching(
-    mock_torch_load,
-    mock_generate,
-    mock_normalize,
-    mock_exists,
-    mock_instance,
-    tts_service,
-    sample_audio,
-):
-    """Test generating audio without text stitching"""
-    mock_normalize.return_value = "Test text"
-    mock_generate.return_value = (sample_audio, None)
-    mock_instance.return_value = (MagicMock(), "cpu")
-    mock_exists.return_value = True
-    mock_torch_load.return_value = MagicMock()
-
-    audio, processing_time = tts_service._generate_audio(
-        "Test text", "af", 1.0, stitch_long_output=False
-    )
-    assert isinstance(audio, np.ndarray)
-    assert len(audio) > 0
-    mock_generate.assert_called_once()
-
-
-@patch("os.listdir")
-def test_list_voices_error(mock_listdir, tts_service):
-    """Test error handling in list_voices"""
-    mock_listdir.side_effect = Exception("Failed to list directory")
-
-    voices = tts_service.list_voices()
-    assert voices == []
-
-
-@patch("api.src.services.tts.TTSModel.get_instance")
-@patch("os.path.exists")
-@patch("api.src.services.tts.normalize_text")
-@patch("api.src.services.tts.phonemize")
-@patch("api.src.services.tts.tokenize")
-@patch("api.src.services.tts.generate")
-@patch("torch.load")
-def test_generate_audio_phonemize_error(
-    mock_torch_load,
-    mock_generate,
-    mock_tokenize,
-    mock_phonemize,
-    mock_normalize,
-    mock_exists,
-    mock_instance,
-    tts_service,
-):
-    """Test handling phonemization error"""
-    mock_normalize.return_value = "Test text"
-    mock_phonemize.side_effect = Exception("Phonemization failed")
-    mock_instance.return_value = (MagicMock(), "cpu")
-    mock_exists.return_value = True
-    mock_torch_load.return_value = MagicMock()
-    mock_generate.return_value = (None, None)
-
-    with pytest.raises(ValueError, match="No audio chunks were generated successfully"):
-        tts_service._generate_audio("Test text", "af", 1.0)
-
-
-@patch("api.src.services.tts.TTSModel.get_instance")
-@patch("os.path.exists")
-@patch("api.src.services.tts.normalize_text")
-@patch("api.src.services.tts.generate")
-@patch("torch.load")
-def test_generate_audio_error(
-    mock_torch_load,
-    mock_generate,
-    mock_normalize,
-    mock_exists,
-    mock_instance,
-    tts_service,
-):
-    """Test handling generation error"""
-    mock_normalize.return_value = "Test text"
-    mock_generate.side_effect = Exception("Generation failed")
-    mock_instance.return_value = (MagicMock(), "cpu")
-    mock_exists.return_value = True
-    mock_torch_load.return_value = MagicMock()
-
-    with pytest.raises(ValueError, match="No audio chunks were generated successfully"):
-        tts_service._generate_audio("Test text", "af", 1.0)
diff --git a/docker-compose.cpu.yml b/docker-compose.cpu.yml
index faea2fe..f44f2d4 100644
--- a/docker-compose.cpu.yml
+++ b/docker-compose.cpu.yml
@@ -36,6 +36,13 @@ services:
       - "8880:8880"
     environment:
       - PYTHONPATH=/app:/app/Kokoro-82M
+      # ONNX Optimization Settings for vectorized operations
+      - ONNX_NUM_THREADS=8  # Maximize core usage for vectorized ops
+      - ONNX_INTER_OP_THREADS=4  # Higher inter-op for parallel matrix operations
+      - ONNX_EXECUTION_MODE=parallel
+      - ONNX_OPTIMIZATION_LEVEL=all
+      - ONNX_MEMORY_PATTERN=true
+      - ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo
     depends_on:
       model-fetcher:
         condition: service_healthy
diff --git a/examples/__init__.py b/examples/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/assorted_checks/__init__.py b/examples/assorted_checks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/assorted_checks/benchmarks/__init__.py b/examples/assorted_checks/benchmarks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py b/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py
new file mode 100644
index 0000000..385abb0
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py
@@ -0,0 +1,242 @@
+#!/usr/bin/env python3
+import os
+import json
+import time
+import threading
+import queue
+import pandas as pd
+import sys
+from datetime import datetime
+
+from lib.shared_plotting import plot_system_metrics, plot_correlation
+from lib.shared_utils import (
+    get_system_metrics, save_json_results, write_benchmark_stats,
+    real_time_factor
+)
+from lib.shared_benchmark_utils import (
+    get_text_for_tokens, make_tts_request, generate_token_sizes, enc
+)
+
+class SystemMonitor:
+    def __init__(self, interval=1.0):
+        self.interval = interval
+        self.metrics_queue = queue.Queue()
+        self.stop_event = threading.Event()
+        self.metrics_timeline = []
+        self.start_time = None
+        
+    def _monitor_loop(self):
+        """Background thread function to collect system metrics."""
+        while not self.stop_event.is_set():
+            metrics = get_system_metrics()
+            metrics["relative_time"] = time.time() - self.start_time
+            self.metrics_queue.put(metrics)
+            time.sleep(self.interval)
+    
+    def start(self):
+        """Start the monitoring thread."""
+        self.start_time = time.time()
+        self.monitor_thread = threading.Thread(target=self._monitor_loop)
+        self.monitor_thread.daemon = True
+        self.monitor_thread.start()
+    
+    def stop(self):
+        """Stop the monitoring thread and collect final metrics."""
+        self.stop_event.set()
+        if hasattr(self, 'monitor_thread'):
+            self.monitor_thread.join(timeout=2)
+        
+        # Collect all metrics from queue
+        while True:
+            try:
+                metrics = self.metrics_queue.get_nowait()
+                self.metrics_timeline.append(metrics)
+            except queue.Empty:
+                break
+        
+        return self.metrics_timeline
+
+def main():
+    # Initialize system monitor
+    monitor = SystemMonitor(interval=1.0)  # 1 second interval
+    # Set prefix for output files (e.g. "gpu", "cpu", "onnx", etc.)
+    prefix = "gpu"
+    # Generate token sizes
+    if 'gpu' in prefix:
+        token_sizes = generate_token_sizes(
+            max_tokens=5000, dense_step=150, 
+            dense_max=1000, sparse_step=1000)
+    elif 'cpu' in prefix:
+        token_sizes = generate_token_sizes(
+            max_tokens=1000, dense_step=300, 
+            dense_max=1000, sparse_step=0)
+    else:
+        token_sizes = generate_token_sizes(max_tokens=3000)
+
+    # Set up paths relative to this file
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    output_dir = os.path.join(script_dir, "output_audio")
+    output_data_dir = os.path.join(script_dir, "output_data")
+    output_plots_dir = os.path.join(script_dir, "output_plots")
+    
+    # Create output directories
+    os.makedirs(output_dir, exist_ok=True)
+    os.makedirs(output_data_dir, exist_ok=True)
+    os.makedirs(output_plots_dir, exist_ok=True)
+
+    # Function to prefix filenames
+    def prefix_path(path: str, filename: str) -> str:
+        if prefix:
+            filename = f"{prefix}_{filename}"
+        return os.path.join(path, filename)
+
+    with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
+        text = f.read()
+
+    total_tokens = len(enc.encode(text))
+    print(f"Total tokens in file: {total_tokens}")
+
+    print(f"Testing sizes: {token_sizes}")
+
+    results = []
+    test_start_time = time.time()
+    
+    # Start system monitoring
+    monitor.start()
+
+    for num_tokens in token_sizes:
+        chunk = get_text_for_tokens(text, num_tokens)
+        actual_tokens = len(enc.encode(chunk))
+
+        print(f"\nProcessing chunk with {actual_tokens} tokens:")
+        print(f"Text preview: {chunk[:100]}...")
+
+        processing_time, audio_length = make_tts_request(
+            chunk,
+            output_dir=output_dir,
+            prefix=prefix
+        )
+        if processing_time is None or audio_length is None:
+            print("Breaking loop due to error")
+            break
+
+        # Calculate RTF using the correct formula
+        rtf = real_time_factor(processing_time, audio_length)
+        print(f"Real-Time Factor: {rtf:.5f}")
+        
+        results.append({
+            "tokens": actual_tokens,
+            "processing_time": processing_time,
+            "output_length": audio_length,
+            "rtf": rtf,
+            "elapsed_time": round(time.time() - test_start_time, 2),
+        })
+
+    df = pd.DataFrame(results)
+    if df.empty:
+        print("No data to plot")
+        return
+
+    df["tokens_per_second"] = df["tokens"] / df["processing_time"]
+
+    # Write benchmark stats
+    stats = [
+        {
+            "title": "Benchmark Statistics (with correct RTF)",
+            "stats": {
+                "Total tokens processed": df['tokens'].sum(),
+                "Total audio generated (s)": df['output_length'].sum(),
+                "Total test duration (s)": df['elapsed_time'].max(),
+                "Average processing rate (tokens/s)": df['tokens_per_second'].mean(),
+                "Average RTF": df['rtf'].mean(),
+                "Average Real Time Speed": 1/df['rtf'].mean()
+            }
+        },
+        {
+            "title": "Per-chunk Stats",
+            "stats": {
+                "Average chunk size (tokens)": df['tokens'].mean(),
+                "Min chunk size (tokens)": df['tokens'].min(),
+                "Max chunk size (tokens)": df['tokens'].max(),
+                "Average processing time (s)": df['processing_time'].mean(),
+                "Average output length (s)": df['output_length'].mean()
+            }
+        },
+        {
+            "title": "Performance Ranges",
+            "stats": {
+                "Processing rate range (tokens/s)": f"{df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f}",
+                "RTF range": f"{df['rtf'].min():.2f}x - {df['rtf'].max():.2f}x",
+                "Real Time Speed range": f"{1/df['rtf'].max():.2f}x - {1/df['rtf'].min():.2f}x"
+            }
+        }
+    ]
+    write_benchmark_stats(stats, prefix_path(output_data_dir, "benchmark_stats_rtf.txt"))
+
+    # Plot Processing Time vs Token Count
+    plot_correlation(
+        df, "tokens", "processing_time",
+        "Processing Time vs Input Size",
+        "Number of Input Tokens",
+        "Processing Time (seconds)",
+        prefix_path(output_plots_dir, "processing_time_rtf.png")
+    )
+
+    # Plot RTF vs Token Count
+    plot_correlation(
+        df, "tokens", "rtf",
+        "Real-Time Factor vs Input Size",
+        "Number of Input Tokens",
+        "Real-Time Factor (processing time / audio length)",
+        prefix_path(output_plots_dir, "realtime_factor_rtf.png")
+    )
+
+    # Stop monitoring and get final metrics
+    final_metrics = monitor.stop()
+    
+    # Convert metrics timeline to DataFrame for stats
+    metrics_df = pd.DataFrame(final_metrics)
+    
+    # Add system usage stats
+    if not metrics_df.empty:
+        stats.append({
+            "title": "System Usage Statistics",
+            "stats": {
+                "Peak CPU Usage (%)": metrics_df['cpu_percent'].max(),
+                "Avg CPU Usage (%)": metrics_df['cpu_percent'].mean(),
+                "Peak RAM Usage (%)": metrics_df['ram_percent'].max(),
+                "Avg RAM Usage (%)": metrics_df['ram_percent'].mean(),
+                "Peak RAM Used (GB)": metrics_df['ram_used_gb'].max(),
+                "Avg RAM Used (GB)": metrics_df['ram_used_gb'].mean(),
+            }
+        })
+        if 'gpu_memory_used' in metrics_df:
+            stats[-1]["stats"].update({
+                "Peak GPU Memory (MB)": metrics_df['gpu_memory_used'].max(),
+                "Avg GPU Memory (MB)": metrics_df['gpu_memory_used'].mean(),
+            })
+    
+    # Plot system metrics
+    plot_system_metrics(final_metrics, prefix_path(output_plots_dir, "system_usage_rtf.png"))
+
+    # Save final results
+    save_json_results(
+        {
+            "results": results,
+            "system_metrics": final_metrics,
+            "test_duration": time.time() - test_start_time
+        },
+        prefix_path(output_data_dir, "benchmark_results_rtf.json")
+    )
+
+    print("\nResults saved to:")
+    print(f"- {prefix_path(output_data_dir, 'benchmark_results_rtf.json')}")
+    print(f"- {prefix_path(output_data_dir, 'benchmark_stats_rtf.txt')}")
+    print(f"- {prefix_path(output_plots_dir, 'processing_time_rtf.png')}")
+    print(f"- {prefix_path(output_plots_dir, 'realtime_factor_rtf.png')}")
+    print(f"- {prefix_path(output_plots_dir, 'system_usage_rtf.png')}")
+    print(f"\nAudio files saved in {output_dir} with prefix: {prefix or '(none)'}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/assorted_checks/benchmarks/depr_benchmark_tts.py b/examples/assorted_checks/benchmarks/depr_benchmark_tts.py
new file mode 100644
index 0000000..989c177
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/depr_benchmark_tts.py
@@ -0,0 +1,165 @@
+import os
+import json
+import time
+import pandas as pd
+from examples.assorted_checks.lib.shared_plotting import plot_system_metrics, plot_correlation
+from examples.assorted_checks.lib.shared_utils import (
+    get_system_metrics, save_json_results, write_benchmark_stats
+)
+from examples.assorted_checks.lib.shared_benchmark_utils import (
+    get_text_for_tokens, make_tts_request, generate_token_sizes, enc
+)
+
+
+def main():
+    # Get optional prefix from first command line argument
+    import sys
+    prefix = sys.argv[1] if len(sys.argv) > 1 else ""
+
+    # Set up paths relative to this file
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    output_dir = os.path.join(script_dir, "output_audio")
+    output_data_dir = os.path.join(script_dir, "output_data")
+    output_plots_dir = os.path.join(script_dir, "output_plots")
+    
+    # Create output directories
+    os.makedirs(output_dir, exist_ok=True)
+    os.makedirs(output_data_dir, exist_ok=True)
+    os.makedirs(output_plots_dir, exist_ok=True)
+
+    # Function to prefix filenames
+    def prefix_path(path: str, filename: str) -> str:
+        if prefix:
+            filename = f"{prefix}_{filename}"
+        return os.path.join(path, filename)
+
+    # Read input text
+    with open(
+        os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8"
+    ) as f:
+        text = f.read()
+
+    # Get total tokens in file
+    total_tokens = len(enc.encode(text))
+    print(f"Total tokens in file: {total_tokens}")
+
+
+    token_sizes = generate_token_sizes(total_tokens)
+
+    print(f"Testing sizes: {token_sizes}")
+
+    # Process chunks
+    results = []
+    system_metrics = []
+    test_start_time = time.time()
+
+    for num_tokens in token_sizes:
+        # Get text slice with exact token count
+        chunk = get_text_for_tokens(text, num_tokens)
+        actual_tokens = len(enc.encode(chunk))
+
+        print(f"\nProcessing chunk with {actual_tokens} tokens:")
+        print(f"Text preview: {chunk[:100]}...")
+
+        # Collect system metrics before processing
+        system_metrics.append(get_system_metrics())
+
+        processing_time, audio_length = make_tts_request(chunk)
+        if processing_time is None or audio_length is None:
+            print("Breaking loop due to error")
+            break
+
+        # Collect system metrics after processing
+        system_metrics.append(get_system_metrics())
+
+        results.append(
+            {
+                "tokens": actual_tokens,
+                "processing_time": processing_time,
+                "output_length": audio_length,
+                "realtime_factor": audio_length / processing_time,
+                "elapsed_time": time.time() - test_start_time,
+            }
+        )
+
+        # Save intermediate results
+        save_json_results(
+            {"results": results, "system_metrics": system_metrics},
+            prefix_path(output_data_dir, "benchmark_results.json")
+        )
+
+    # Create DataFrame and calculate stats
+    df = pd.DataFrame(results)
+    if df.empty:
+        print("No data to plot")
+        return
+
+    # Calculate useful metrics
+    df["tokens_per_second"] = df["tokens"] / df["processing_time"]
+
+    # Write benchmark stats
+    stats = [
+        {
+            "title": "Benchmark Statistics",
+            "stats": {
+                "Total tokens processed": df['tokens'].sum(),
+                "Total audio generated (s)": df['output_length'].sum(),
+                "Total test duration (s)": df['elapsed_time'].max(),
+                "Average processing rate (tokens/s)": df['tokens_per_second'].mean(),
+                "Average realtime factor": df['realtime_factor'].mean()
+            }
+        },
+        {
+            "title": "Per-chunk Stats",
+            "stats": {
+                "Average chunk size (tokens)": df['tokens'].mean(),
+                "Min chunk size (tokens)": df['tokens'].min(),
+                "Max chunk size (tokens)": df['tokens'].max(),
+                "Average processing time (s)": df['processing_time'].mean(),
+                "Average output length (s)": df['output_length'].mean()
+            }
+        },
+        {
+            "title": "Performance Ranges",
+            "stats": {
+                "Processing rate range (tokens/s)": f"{df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f}",
+                "Realtime factor range": f"{df['realtime_factor'].min():.2f}x - {df['realtime_factor'].max():.2f}x"
+            }
+        }
+    ]
+    write_benchmark_stats(stats, prefix_path(output_data_dir, "benchmark_stats.txt"))
+
+    # Plot Processing Time vs Token Count
+    plot_correlation(
+        df, "tokens", "processing_time",
+        "Processing Time vs Input Size",
+        "Number of Input Tokens",
+        "Processing Time (seconds)",
+        prefix_path(output_plots_dir, "processing_time.png")
+    )
+
+    # Plot Realtime Factor vs Token Count
+    plot_correlation(
+        df, "tokens", "realtime_factor",
+        "Realtime Factor vs Input Size",
+        "Number of Input Tokens",
+        "Realtime Factor (output length / processing time)",
+        prefix_path(output_plots_dir, "realtime_factor.png")
+    )
+
+    # Plot system metrics
+    plot_system_metrics(system_metrics, prefix_path(output_plots_dir, "system_usage.png"))
+
+    print("\nResults saved to:")
+    print(f"- {prefix_path(output_data_dir, 'benchmark_results.json')}")
+    print(f"- {prefix_path(output_data_dir, 'benchmark_stats.txt')}")
+    print(f"- {prefix_path(output_plots_dir, 'processing_time.png')}")
+    print(f"- {prefix_path(output_plots_dir, 'realtime_factor.png')}")
+    print(f"- {prefix_path(output_plots_dir, 'system_usage.png')}")
+    if any("gpu_memory_used" in m for m in system_metrics):
+        print(f"- {prefix_path(output_plots_dir, 'gpu_usage.png')}")
+    print(f"\nAudio files saved in {output_dir} with prefix: {prefix or '(none)'}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/assorted_checks/benchmarks/lib/__init__.py b/examples/assorted_checks/benchmarks/lib/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py b/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py
new file mode 100644
index 0000000..c2fd1c4
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py
@@ -0,0 +1,111 @@
+"""Shared utilities specific to TTS benchmarking."""
+import time
+from typing import List, Optional, Tuple
+
+import requests
+import tiktoken
+
+from .shared_utils import get_audio_length, save_audio_file
+
+# Global tokenizer instance
+enc = tiktoken.get_encoding("cl100k_base")
+
+
+def get_text_for_tokens(text: str, num_tokens: int) -> str:
+    """Get a slice of text that contains exactly num_tokens tokens.
+    
+    Args:
+        text: Input text to slice
+        num_tokens: Desired number of tokens
+        
+    Returns:
+        str: Text slice containing exactly num_tokens tokens
+    """
+    tokens = enc.encode(text)
+    if num_tokens > len(tokens):
+        return text
+    return enc.decode(tokens[:num_tokens])
+
+
+def make_tts_request(
+    text: str,
+    output_dir: str = None,
+    timeout: int = 1800,
+    prefix: str = ""
+) -> Tuple[Optional[float], Optional[float]]:
+    """Make TTS request using OpenAI-compatible endpoint.
+    
+    Args:
+        text: Input text to convert to speech
+        output_dir: Directory to save audio files. If None, audio won't be saved.
+        timeout: Request timeout in seconds
+        prefix: Optional prefix for output filenames
+        
+    Returns:
+        tuple: (processing_time, audio_length) in seconds, or (None, None) on error
+    """
+    try:
+        start_time = time.time()
+        response = requests.post(
+            "http://localhost:8880/v1/audio/speech",
+            json={
+                "model": "kokoro",
+                "input": text,
+                "voice": "af",
+                "response_format": "wav",
+            },
+            timeout=timeout,
+        )
+        response.raise_for_status()
+
+        processing_time = round(time.time() - start_time, 2)
+        # Calculate audio length from response content
+        audio_length = get_audio_length(response.content)
+        
+        # Save the audio file if output_dir is provided
+        if output_dir:
+            token_count = len(enc.encode(text))
+            output_file = save_audio_file(
+                response.content,
+                f"chunk_{token_count}_tokens",
+                output_dir
+            )
+            print(f"Saved audio to {output_file}")
+
+        return processing_time, audio_length
+
+    except requests.exceptions.RequestException as e:
+        print(f"Error making request for text: {text[:50]}... Error: {str(e)}")
+        return None, None
+    except Exception as e:
+        print(f"Error processing text: {text[:50]}... Error: {str(e)}")
+        return None, None
+
+
+def generate_token_sizes(
+    max_tokens: int,
+    dense_step: int = 100,
+    dense_max: int = 1000,
+    sparse_step: int = 1000
+) -> List[int]:
+    """Generate token size ranges with dense sampling at start.
+    
+    Args:
+        max_tokens: Maximum number of tokens to generate sizes up to
+        dense_step: Step size for dense sampling range
+        dense_max: Maximum value for dense sampling
+        sparse_step: Step size for sparse sampling range
+        
+    Returns:
+        list: Sorted list of token sizes
+    """
+    # Dense sampling at start
+    dense_range = list(range(dense_step, dense_max + 1, dense_step))
+    
+    if max_tokens <= dense_max or sparse_step < dense_max:
+        return sorted(dense_range)
+    # Sparse sampling for larger sizes
+    sparse_range = list(range(dense_max + sparse_step, max_tokens + 1, sparse_step))
+    
+    # Combine and deduplicate
+    return sorted(list(set(dense_range + sparse_range)))
diff --git a/examples/assorted_checks/benchmarks/lib/shared_plotting.py b/examples/assorted_checks/benchmarks/lib/shared_plotting.py
new file mode 100644
index 0000000..b1679df
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/lib/shared_plotting.py
@@ -0,0 +1,176 @@
+"""Shared plotting utilities for benchmarks and tests."""
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+# Common style configurations
+STYLE_CONFIG = {
+    "background_color": "#1a1a2e",
+    "primary_color": "#ff2a6d",
+    "secondary_color": "#05d9e8",
+    "grid_color": "#ffffff",
+    "text_color": "#ffffff",
+    "font_sizes": {
+        "title": 16,
+        "label": 14,
+        "tick": 12,
+        "text": 10
+    }
+}
+
+def setup_plot(fig, ax, title, xlabel=None, ylabel=None):
+    """Configure plot styling with consistent theme.
+    
+    Args:
+        fig: matplotlib figure object
+        ax: matplotlib axis object
+        title: str, plot title
+        xlabel: str, optional x-axis label
+        ylabel: str, optional y-axis label
+    
+    Returns:
+        tuple: (fig, ax) with applied styling
+    """
+    # Grid styling
+    ax.grid(True, linestyle="--", alpha=0.3, color=STYLE_CONFIG["grid_color"])
+    
+    # Title and labels
+    ax.set_title(title, pad=20, 
+                fontsize=STYLE_CONFIG["font_sizes"]["title"], 
+                fontweight="bold", 
+                color=STYLE_CONFIG["text_color"])
+    
+    if xlabel:
+        ax.set_xlabel(xlabel, 
+                     fontsize=STYLE_CONFIG["font_sizes"]["label"], 
+                     fontweight="medium", 
+                     color=STYLE_CONFIG["text_color"])
+    if ylabel:
+        ax.set_ylabel(ylabel, 
+                     fontsize=STYLE_CONFIG["font_sizes"]["label"], 
+                     fontweight="medium", 
+                     color=STYLE_CONFIG["text_color"])
+    
+    # Tick styling
+    ax.tick_params(labelsize=STYLE_CONFIG["font_sizes"]["tick"], 
+                  colors=STYLE_CONFIG["text_color"])
+    
+    # Spine styling
+    for spine in ax.spines.values():
+        spine.set_color(STYLE_CONFIG["text_color"])
+        spine.set_alpha(0.3)
+        spine.set_linewidth(0.5)
+    
+    # Background colors
+    ax.set_facecolor(STYLE_CONFIG["background_color"])
+    fig.patch.set_facecolor(STYLE_CONFIG["background_color"])
+    
+    return fig, ax
+
+def plot_system_metrics(metrics_data, output_path):
+    """Create plots for system metrics over time.
+    
+    Args:
+        metrics_data: list of dicts containing system metrics
+        output_path: str, path to save the output plot
+    """
+    df = pd.DataFrame(metrics_data)
+    df["timestamp"] = pd.to_datetime(df["timestamp"])
+    elapsed_time = (df["timestamp"] - df["timestamp"].iloc[0]).dt.total_seconds()
+    
+    # Get baseline values
+    baseline_cpu = df["cpu_percent"].iloc[0]
+    baseline_ram = df["ram_used_gb"].iloc[0]
+    baseline_gpu = df["gpu_memory_used"].iloc[0] / 1024 if "gpu_memory_used" in df.columns else None
+    
+    # Convert GPU memory to GB if present
+    if "gpu_memory_used" in df.columns:
+        df["gpu_memory_gb"] = df["gpu_memory_used"] / 1024
+    
+    plt.style.use("dark_background")
+    
+    # Create subplots based on available metrics
+    has_gpu = "gpu_memory_used" in df.columns
+    num_plots = 3 if has_gpu else 2
+    fig, axes = plt.subplots(num_plots, 1, figsize=(15, 5 * num_plots))
+    fig.patch.set_facecolor(STYLE_CONFIG["background_color"])
+    
+    # Smoothing window
+    window = min(5, len(df) // 2)
+    
+    # Plot CPU Usage
+    smoothed_cpu = df["cpu_percent"].rolling(window=window, center=True).mean()
+    sns.lineplot(x=elapsed_time, y=smoothed_cpu, ax=axes[0], 
+                color=STYLE_CONFIG["primary_color"], linewidth=2)
+    axes[0].axhline(y=baseline_cpu, color=STYLE_CONFIG["secondary_color"], 
+                    linestyle="--", alpha=0.5, label="Baseline")
+    setup_plot(fig, axes[0], "CPU Usage Over Time", 
+              xlabel="Time (seconds)", ylabel="CPU Usage (%)")
+    axes[0].set_ylim(0, max(df["cpu_percent"]) * 1.1)
+    axes[0].legend()
+    
+    # Plot RAM Usage
+    smoothed_ram = df["ram_used_gb"].rolling(window=window, center=True).mean()
+    sns.lineplot(x=elapsed_time, y=smoothed_ram, ax=axes[1], 
+                color=STYLE_CONFIG["secondary_color"], linewidth=2)
+    axes[1].axhline(y=baseline_ram, color=STYLE_CONFIG["primary_color"], 
+                    linestyle="--", alpha=0.5, label="Baseline")
+    setup_plot(fig, axes[1], "RAM Usage Over Time", 
+              xlabel="Time (seconds)", ylabel="RAM Usage (GB)")
+    axes[1].set_ylim(0, max(df["ram_used_gb"]) * 1.1)
+    axes[1].legend()
+    
+    # Plot GPU Memory if available
+    if has_gpu:
+        smoothed_gpu = df["gpu_memory_gb"].rolling(window=window, center=True).mean()
+        sns.lineplot(x=elapsed_time, y=smoothed_gpu, ax=axes[2], 
+                    color=STYLE_CONFIG["primary_color"], linewidth=2)
+        axes[2].axhline(y=baseline_gpu, color=STYLE_CONFIG["secondary_color"], 
+                        linestyle="--", alpha=0.5, label="Baseline")
+        setup_plot(fig, axes[2], "GPU Memory Usage Over Time", 
+                  xlabel="Time (seconds)", ylabel="GPU Memory (GB)")
+        axes[2].set_ylim(0, max(df["gpu_memory_gb"]) * 1.1)
+        axes[2].legend()
+    
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
+
+def plot_correlation(df, x, y, title, xlabel, ylabel, output_path):
+    """Create correlation plot with regression line and correlation coefficient.
+    
+    Args:
+        df: pandas DataFrame containing the data
+        x: str, column name for x-axis
+        y: str, column name for y-axis
+        title: str, plot title
+        xlabel: str, x-axis label
+        ylabel: str, y-axis label
+        output_path: str, path to save the output plot
+    """
+    plt.style.use("dark_background")
+    
+    fig, ax = plt.subplots(figsize=(12, 8))
+    
+    # Scatter plot
+    sns.scatterplot(data=df, x=x, y=y, s=100, alpha=0.6, 
+                    color=STYLE_CONFIG["primary_color"])
+    
+    # Regression line
+    sns.regplot(data=df, x=x, y=y, scatter=False, 
+                color=STYLE_CONFIG["secondary_color"], 
+                line_kws={"linewidth": 2})
+    
+    # Add correlation coefficient
+    corr = df[x].corr(df[y])
+    plt.text(0.05, 0.95, f"Correlation: {corr:.2f}", 
+             transform=ax.transAxes, 
+             fontsize=STYLE_CONFIG["font_sizes"]["text"], 
+             color=STYLE_CONFIG["text_color"],
+             bbox=dict(facecolor=STYLE_CONFIG["background_color"], 
+                      edgecolor=STYLE_CONFIG["text_color"], 
+                      alpha=0.7))
+    
+    setup_plot(fig, ax, title, xlabel=xlabel, ylabel=ylabel)
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
+    plt.close()
diff --git a/examples/assorted_checks/benchmarks/lib/shared_utils.py b/examples/assorted_checks/benchmarks/lib/shared_utils.py
new file mode 100644
index 0000000..a9c872e
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/lib/shared_utils.py
@@ -0,0 +1,174 @@
+"""Shared utilities for benchmarks and tests."""
+import os
+import json
+import subprocess
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Union
+
+import psutil
+import scipy.io.wavfile as wavfile
+
+# Check for torch availability once at module level
+TORCH_AVAILABLE = False
+try:
+    import torch
+    TORCH_AVAILABLE = torch.cuda.is_available()
+except ImportError:
+    pass
+
+
+def get_audio_length(audio_data: bytes, temp_dir: str = None) -> float:
+    """Get audio length in seconds from bytes data.
+    
+    Args:
+        audio_data: Raw audio bytes
+        temp_dir: Directory for temporary file. If None, uses system temp directory.
+        
+    Returns:
+        float: Audio length in seconds
+    """
+    if temp_dir is None:
+        import tempfile
+        temp_dir = tempfile.gettempdir()
+    
+    temp_path = os.path.join(temp_dir, "temp.wav")
+    os.makedirs(temp_dir, exist_ok=True)
+    
+    with open(temp_path, "wb") as f:
+        f.write(audio_data)
+
+    try:
+        rate, data = wavfile.read(temp_path)
+        return len(data) / rate
+    finally:
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+
+
+def get_gpu_memory(average: bool = True) -> Optional[Union[float, List[float]]]:
+    """Get GPU memory usage using PyTorch if available, falling back to nvidia-smi.
+    
+    Args:
+        average: If True and multiple GPUs present, returns average memory usage.
+                If False, returns list of memory usage per GPU.
+    
+    Returns:
+        float or List[float] or None: GPU memory usage in MB. Returns None if no GPU available.
+        If average=False and multiple GPUs present, returns list of values.
+    """
+    if TORCH_AVAILABLE:
+        n_gpus = torch.cuda.device_count()
+        memory_used = []
+        for i in range(n_gpus):
+            memory_used.append(torch.cuda.memory_allocated(i) / 1024**2)  # Convert to MB
+        
+        if average and len(memory_used) > 0:
+            return sum(memory_used) / len(memory_used)
+        return memory_used if len(memory_used) > 1 else memory_used[0]
+    
+    # Fall back to nvidia-smi
+    try:
+        result = subprocess.check_output(
+            ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,nounits,noheader"]
+        )
+        memory_values = [float(x.strip()) for x in result.decode("utf-8").split("\n") if x.strip()]
+        
+        if average and len(memory_values) > 0:
+            return sum(memory_values) / len(memory_values)
+        return memory_values if len(memory_values) > 1 else memory_values[0]
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return None
+
+
+def get_system_metrics() -> Dict[str, Union[str, float]]:
+    """Get current system metrics including CPU, RAM, and GPU if available.
+    
+    Returns:
+        dict: System metrics including timestamp, CPU%, RAM%, RAM GB, and GPU MB if available
+    """
+    # Get per-CPU percentages and calculate average
+    cpu_percentages = psutil.cpu_percent(percpu=True)
+    avg_cpu = sum(cpu_percentages) / len(cpu_percentages)
+    
+    metrics = {
+        "timestamp": datetime.now().isoformat(),
+        "cpu_percent": round(avg_cpu, 2),
+        "ram_percent": psutil.virtual_memory().percent,
+        "ram_used_gb": psutil.virtual_memory().used / (1024**3),
+    }
+
+    gpu_mem = get_gpu_memory(average=True)  # Use average for system metrics
+    if gpu_mem is not None:
+        metrics["gpu_memory_used"] = round(gpu_mem, 2)
+
+    return metrics
+
+
+def save_audio_file(audio_data: bytes, identifier: str, output_dir: str) -> str:
+    """Save audio data to a file with proper naming and directory creation.
+    
+    Args:
+        audio_data: Raw audio bytes
+        identifier: String to identify this audio file (e.g. token count, test name)
+        output_dir: Directory to save the file
+        
+    Returns:
+        str: Path to the saved audio file
+    """
+    os.makedirs(output_dir, exist_ok=True)
+    output_file = os.path.join(output_dir, f"{identifier}.wav")
+    
+    with open(output_file, "wb") as f:
+        f.write(audio_data)
+        
+    return output_file
+
+
+def write_benchmark_stats(stats: List[Dict[str, Any]], output_file: str) -> None:
+    """Write benchmark statistics to a file in a clean, organized format.
+    
+    Args:
+        stats: List of dictionaries containing stat name/value pairs
+        output_file: Path to output file
+    """
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    
+    with open(output_file, "w") as f:
+        for section in stats:
+            # Write section header
+            f.write(f"=== {section['title']} ===\n\n")
+            
+            # Write stats
+            for label, value in section['stats'].items():
+                if isinstance(value, float):
+                    f.write(f"{label}: {value:.2f}\n")
+                else:
+                    f.write(f"{label}: {value}\n")
+            f.write("\n")
+
+
+def save_json_results(results: Dict[str, Any], output_file: str) -> None:
+    """Save benchmark results to a JSON file with proper formatting.
+    
+    Args:
+        results: Dictionary of results to save
+        output_file: Path to output file
+    """
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(output_file, "w") as f:
+        json.dump(results, f, indent=2)
+
+
+def real_time_factor(processing_time: float, audio_length: float, decimals: int = 2) -> float:
+    """Calculate Real-Time Factor (RTF) as processing-time / length-of-audio.
+    
+    Args:
+        processing_time: Time taken to process/generate audio
+        audio_length: Length of the generated audio
+        decimals: Number of decimal places to round to
+        
+    Returns:
+        float: RTF value
+    """
+    rtf = processing_time / audio_length
+    return round(rtf, decimals)
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results.json
new file mode 100644
index 0000000..5c60933
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/output_data/benchmark_results.json
@@ -0,0 +1,111 @@
+{
+  "results": [
+    {
+      "tokens": 100,
+      "processing_time": 18.833295583724976,
+      "output_length": 31.15,
+      "realtime_factor": 1.6539856161403135,
+      "elapsed_time": 19.024322748184204
+    },
+    {
+      "tokens": 200,
+      "processing_time": 38.95506024360657,
+      "output_length": 62.6,
+      "realtime_factor": 1.6069799304257042,
+      "elapsed_time": 58.21527123451233
+    },
+    {
+      "tokens": 300,
+      "processing_time": 49.74252939224243,
+      "output_length": 96.325,
+      "realtime_factor": 1.9364716908630366,
+      "elapsed_time": 108.19673728942871
+    },
+    {
+      "tokens": 400,
+      "processing_time": 61.349056243896484,
+      "output_length": 128.575,
+      "realtime_factor": 2.095794261102292,
+      "elapsed_time": 169.733656167984
+    },
+    {
+      "tokens": 500,
+      "processing_time": 82.86568236351013,
+      "output_length": 158.575,
+      "realtime_factor": 1.9136389815071193,
+      "elapsed_time": 252.7968451976776
+    }
+  ],
+  "system_metrics": [
+    {
+      "timestamp": "2025-01-03T00:13:49.865330",
+      "cpu_percent": 8.0,
+      "ram_percent": 39.4,
+      "ram_used_gb": 25.03811264038086,
+      "gpu_memory_used": 1204.0
+    },
+    {
+      "timestamp": "2025-01-03T00:14:08.781551",
+      "cpu_percent": 26.8,
+      "ram_percent": 42.6,
+      "ram_used_gb": 27.090862274169922,
+      "gpu_memory_used": 1225.0
+    },
+    {
+      "timestamp": "2025-01-03T00:14:08.916973",
+      "cpu_percent": 16.1,
+      "ram_percent": 42.6,
+      "ram_used_gb": 27.089553833007812,
+      "gpu_memory_used": 1225.0
+    },
+    {
+      "timestamp": "2025-01-03T00:14:47.979053",
+      "cpu_percent": 31.5,
+      "ram_percent": 43.6,
+      "ram_used_gb": 27.714427947998047,
+      "gpu_memory_used": 1225.0
+    },
+    {
+      "timestamp": "2025-01-03T00:14:48.098976",
+      "cpu_percent": 20.0,
+      "ram_percent": 43.6,
+      "ram_used_gb": 27.704315185546875,
+      "gpu_memory_used": 1211.0
+    },
+    {
+      "timestamp": "2025-01-03T00:15:37.944729",
+      "cpu_percent": 29.7,
+      "ram_percent": 38.6,
+      "ram_used_gb": 24.53925323486328,
+      "gpu_memory_used": 1217.0
+    },
+    {
+      "timestamp": "2025-01-03T00:15:38.071915",
+      "cpu_percent": 8.6,
+      "ram_percent": 38.5,
+      "ram_used_gb": 24.51690673828125,
+      "gpu_memory_used": 1208.0
+    },
+    {
+      "timestamp": "2025-01-03T00:16:39.525449",
+      "cpu_percent": 23.4,
+      "ram_percent": 38.8,
+      "ram_used_gb": 24.71230697631836,
+      "gpu_memory_used": 1221.0
+    },
+    {
+      "timestamp": "2025-01-03T00:16:39.612442",
+      "cpu_percent": 5.5,
+      "ram_percent": 38.9,
+      "ram_used_gb": 24.72066879272461,
+      "gpu_memory_used": 1221.0
+    },
+    {
+      "timestamp": "2025-01-03T00:18:02.569076",
+      "cpu_percent": 27.4,
+      "ram_percent": 39.1,
+      "ram_used_gb": 24.868202209472656,
+      "gpu_memory_used": 1264.0
+    }
+  ]
+}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json
new file mode 100644
index 0000000..52f8f04
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json
@@ -0,0 +1,216 @@
+{
+  "results": [
+    {
+      "tokens": 100,
+      "processing_time": 14.349808931350708,
+      "output_length": 31.15,
+      "rtf": 0.46,
+      "elapsed_time": 14.716031074523926
+    },
+    {
+      "tokens": 200,
+      "processing_time": 28.341803312301636,
+      "output_length": 62.6,
+      "rtf": 0.45,
+      "elapsed_time": 43.44207406044006
+    },
+    {
+      "tokens": 300,
+      "processing_time": 43.352553606033325,
+      "output_length": 96.325,
+      "rtf": 0.45,
+      "elapsed_time": 87.26906609535217
+    },
+    {
+      "tokens": 400,
+      "processing_time": 71.02449822425842,
+      "output_length": 128.575,
+      "rtf": 0.55,
+      "elapsed_time": 158.7198133468628
+    },
+    {
+      "tokens": 500,
+      "processing_time": 70.92521691322327,
+      "output_length": 158.575,
+      "rtf": 0.45,
+      "elapsed_time": 230.01379895210266
+    },
+    {
+      "tokens": 600,
+      "processing_time": 83.6328592300415,
+      "output_length": 189.25,
+      "rtf": 0.44,
+      "elapsed_time": 314.02610969543457
+    },
+    {
+      "tokens": 700,
+      "processing_time": 103.0810194015503,
+      "output_length": 222.075,
+      "rtf": 0.46,
+      "elapsed_time": 417.5678551197052
+    },
+    {
+      "tokens": 800,
+      "processing_time": 127.02162909507751,
+      "output_length": 253.85,
+      "rtf": 0.5,
+      "elapsed_time": 545.0128681659698
+    },
+    {
+      "tokens": 900,
+      "processing_time": 130.49781227111816,
+      "output_length": 283.775,
+      "rtf": 0.46,
+      "elapsed_time": 675.8943417072296
+    },
+    {
+      "tokens": 1000,
+      "processing_time": 154.76425909996033,
+      "output_length": 315.475,
+      "rtf": 0.49,
+      "elapsed_time": 831.0677945613861
+    }
+  ],
+  "system_metrics": [
+    {
+      "timestamp": "2025-01-03T00:23:52.896889",
+      "cpu_percent": 4.5,
+      "ram_percent": 39.1,
+      "ram_used_gb": 24.86032485961914,
+      "gpu_memory_used": 1281.0
+    },
+    {
+      "timestamp": "2025-01-03T00:24:07.429461",
+      "cpu_percent": 4.5,
+      "ram_percent": 39.1,
+      "ram_used_gb": 24.847564697265625,
+      "gpu_memory_used": 1285.0
+    },
+    {
+      "timestamp": "2025-01-03T00:24:07.620587",
+      "cpu_percent": 2.7,
+      "ram_percent": 39.1,
+      "ram_used_gb": 24.846607208251953,
+      "gpu_memory_used": 1275.0
+    },
+    {
+      "timestamp": "2025-01-03T00:24:36.140754",
+      "cpu_percent": 5.4,
+      "ram_percent": 39.1,
+      "ram_used_gb": 24.857810974121094,
+      "gpu_memory_used": 1267.0
+    },
+    {
+      "timestamp": "2025-01-03T00:24:36.340675",
+      "cpu_percent": 6.2,
+      "ram_percent": 39.1,
+      "ram_used_gb": 24.85773468017578,
+      "gpu_memory_used": 1267.0
+    },
+    {
+      "timestamp": "2025-01-03T00:25:19.905634",
+      "cpu_percent": 29.1,
+      "ram_percent": 39.2,
+      "ram_used_gb": 24.920318603515625,
+      "gpu_memory_used": 1256.0
+    },
+    {
+      "timestamp": "2025-01-03T00:25:20.182219",
+      "cpu_percent": 20.0,
+      "ram_percent": 39.2,
+      "ram_used_gb": 24.930198669433594,
+      "gpu_memory_used": 1256.0
+    },
+    {
+      "timestamp": "2025-01-03T00:26:31.414760",
+      "cpu_percent": 5.3,
+      "ram_percent": 39.5,
+      "ram_used_gb": 25.127891540527344,
+      "gpu_memory_used": 1259.0
+    },
+    {
+      "timestamp": "2025-01-03T00:26:31.617256",
+      "cpu_percent": 3.6,
+      "ram_percent": 39.5,
+      "ram_used_gb": 25.126346588134766,
+      "gpu_memory_used": 1252.0
+    },
+    {
+      "timestamp": "2025-01-03T00:27:42.736097",
+      "cpu_percent": 10.5,
+      "ram_percent": 39.5,
+      "ram_used_gb": 25.100231170654297,
+      "gpu_memory_used": 1249.0
+    },
+    {
+      "timestamp": "2025-01-03T00:27:42.912870",
+      "cpu_percent": 5.3,
+      "ram_percent": 39.5,
+      "ram_used_gb": 25.098285675048828,
+      "gpu_memory_used": 1249.0
+    },
+    {
+      "timestamp": "2025-01-03T00:29:06.725264",
+      "cpu_percent": 8.9,
+      "ram_percent": 39.5,
+      "ram_used_gb": 25.123123168945312,
+      "gpu_memory_used": 1239.0
+    },
+    {
+      "timestamp": "2025-01-03T00:29:06.928826",
+      "cpu_percent": 5.5,
+      "ram_percent": 39.5,
+      "ram_used_gb": 25.128646850585938,
+      "gpu_memory_used": 1239.0
+    },
+    {
+      "timestamp": "2025-01-03T00:30:50.206349",
+      "cpu_percent": 49.6,
+      "ram_percent": 39.6,
+      "ram_used_gb": 25.162948608398438,
+      "gpu_memory_used": 1245.0
+    },
+    {
+      "timestamp": "2025-01-03T00:30:50.491837",
+      "cpu_percent": 14.8,
+      "ram_percent": 39.5,
+      "ram_used_gb": 25.13379669189453,
+      "gpu_memory_used": 1245.0
+    },
+    {
+      "timestamp": "2025-01-03T00:32:57.721467",
+      "cpu_percent": 6.2,
+      "ram_percent": 39.6,
+      "ram_used_gb": 25.187721252441406,
+      "gpu_memory_used": 1384.0
+    },
+    {
+      "timestamp": "2025-01-03T00:32:57.913350",
+      "cpu_percent": 3.6,
+      "ram_percent": 39.6,
+      "ram_used_gb": 25.199390411376953,
+      "gpu_memory_used": 1384.0
+    },
+    {
+      "timestamp": "2025-01-03T00:35:08.608730",
+      "cpu_percent": 6.3,
+      "ram_percent": 39.8,
+      "ram_used_gb": 25.311710357666016,
+      "gpu_memory_used": 1330.0
+    },
+    {
+      "timestamp": "2025-01-03T00:35:08.791851",
+      "cpu_percent": 5.3,
+      "ram_percent": 39.8,
+      "ram_used_gb": 25.326683044433594,
+      "gpu_memory_used": 1333.0
+    },
+    {
+      "timestamp": "2025-01-03T00:37:43.782406",
+      "cpu_percent": 6.8,
+      "ram_percent": 40.6,
+      "ram_used_gb": 25.803058624267578,
+      "gpu_memory_used": 1409.0
+    }
+  ]
+}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json
new file mode 100644
index 0000000..59ad009
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json
@@ -0,0 +1,300 @@
+{
+  "results": [
+    {
+      "tokens": 100,
+      "processing_time": 0.96,
+      "output_length": 31.1,
+      "rtf": 0.03,
+      "elapsed_time": 1.11
+    },
+    {
+      "tokens": 250,
+      "processing_time": 2.23,
+      "output_length": 77.17,
+      "rtf": 0.03,
+      "elapsed_time": 3.49
+    },
+    {
+      "tokens": 400,
+      "processing_time": 4.05,
+      "output_length": 128.05,
+      "rtf": 0.03,
+      "elapsed_time": 7.77
+    },
+    {
+      "tokens": 550,
+      "processing_time": 4.06,
+      "output_length": 171.45,
+      "rtf": 0.02,
+      "elapsed_time": 12.0
+    },
+    {
+      "tokens": 700,
+      "processing_time": 6.01,
+      "output_length": 221.6,
+      "rtf": 0.03,
+      "elapsed_time": 18.16
+    },
+    {
+      "tokens": 850,
+      "processing_time": 6.9,
+      "output_length": 269.1,
+      "rtf": 0.03,
+      "elapsed_time": 25.21
+    },
+    {
+      "tokens": 1000,
+      "processing_time": 7.65,
+      "output_length": 315.05,
+      "rtf": 0.02,
+      "elapsed_time": 33.03
+    },
+    {
+      "tokens": 6000,
+      "processing_time": 48.7,
+      "output_length": 1837.1,
+      "rtf": 0.03,
+      "elapsed_time": 82.21
+    },
+    {
+      "tokens": 11000,
+      "processing_time": 92.44,
+      "output_length": 3388.57,
+      "rtf": 0.03,
+      "elapsed_time": 175.46
+    },
+    {
+      "tokens": 16000,
+      "processing_time": 163.61,
+      "output_length": 4977.32,
+      "rtf": 0.03,
+      "elapsed_time": 340.46
+    },
+    {
+      "tokens": 21000,
+      "processing_time": 209.72,
+      "output_length": 6533.3,
+      "rtf": 0.03,
+      "elapsed_time": 551.92
+    },
+    {
+      "tokens": 26000,
+      "processing_time": 329.35,
+      "output_length": 8068.15,
+      "rtf": 0.04,
+      "elapsed_time": 883.37
+    },
+    {
+      "tokens": 31000,
+      "processing_time": 473.52,
+      "output_length": 9611.48,
+      "rtf": 0.05,
+      "elapsed_time": 1359.28
+    },
+    {
+      "tokens": 36000,
+      "processing_time": 650.98,
+      "output_length": 11157.15,
+      "rtf": 0.06,
+      "elapsed_time": 2012.9
+    }
+  ],
+  "system_metrics": [
+    {
+      "timestamp": "2025-01-03T14:41:01.331735",
+      "cpu_percent": 7.5,
+      "ram_percent": 50.2,
+      "ram_used_gb": 31.960269927978516,
+      "gpu_memory_used": 3191.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:02.357116",
+      "cpu_percent": 17.01,
+      "ram_percent": 50.2,
+      "ram_used_gb": 31.96163558959961,
+      "gpu_memory_used": 3426.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:02.445009",
+      "cpu_percent": 9.5,
+      "ram_percent": 50.3,
+      "ram_used_gb": 31.966781616210938,
+      "gpu_memory_used": 3426.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:04.742152",
+      "cpu_percent": 18.27,
+      "ram_percent": 50.4,
+      "ram_used_gb": 32.08788299560547,
+      "gpu_memory_used": 3642.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:04.847795",
+      "cpu_percent": 16.27,
+      "ram_percent": 50.5,
+      "ram_used_gb": 32.094364166259766,
+      "gpu_memory_used": 3640.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:09.019590",
+      "cpu_percent": 15.97,
+      "ram_percent": 50.7,
+      "ram_used_gb": 32.23244094848633,
+      "gpu_memory_used": 3640.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:09.110324",
+      "cpu_percent": 3.54,
+      "ram_percent": 50.7,
+      "ram_used_gb": 32.234458923339844,
+      "gpu_memory_used": 3640.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:13.252607",
+      "cpu_percent": 13.4,
+      "ram_percent": 50.6,
+      "ram_used_gb": 32.194271087646484,
+      "gpu_memory_used": 3935.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:13.327557",
+      "cpu_percent": 4.69,
+      "ram_percent": 50.6,
+      "ram_used_gb": 32.191776275634766,
+      "gpu_memory_used": 3935.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:19.413633",
+      "cpu_percent": 12.92,
+      "ram_percent": 50.9,
+      "ram_used_gb": 32.3467903137207,
+      "gpu_memory_used": 4250.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:19.492758",
+      "cpu_percent": 7.5,
+      "ram_percent": 50.8,
+      "ram_used_gb": 32.34375,
+      "gpu_memory_used": 4250.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:26.467284",
+      "cpu_percent": 13.09,
+      "ram_percent": 51.2,
+      "ram_used_gb": 32.56281280517578,
+      "gpu_memory_used": 4249.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:26.553559",
+      "cpu_percent": 8.39,
+      "ram_percent": 51.2,
+      "ram_used_gb": 32.56183624267578,
+      "gpu_memory_used": 4249.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:34.284362",
+      "cpu_percent": 12.61,
+      "ram_percent": 51.7,
+      "ram_used_gb": 32.874778747558594,
+      "gpu_memory_used": 4250.0
+    },
+    {
+      "timestamp": "2025-01-03T14:41:34.362353",
+      "cpu_percent": 1.25,
+      "ram_percent": 51.7,
+      "ram_used_gb": 32.87461471557617,
+      "gpu_memory_used": 4250.0
+    },
+    {
+      "timestamp": "2025-01-03T14:42:23.471312",
+      "cpu_percent": 11.64,
+      "ram_percent": 54.9,
+      "ram_used_gb": 34.90264129638672,
+      "gpu_memory_used": 4647.0
+    },
+    {
+      "timestamp": "2025-01-03T14:42:23.547203",
+      "cpu_percent": 5.31,
+      "ram_percent": 54.9,
+      "ram_used_gb": 34.91563415527344,
+      "gpu_memory_used": 4647.0
+    },
+    {
+      "timestamp": "2025-01-03T14:43:56.724933",
+      "cpu_percent": 12.97,
+      "ram_percent": 59.5,
+      "ram_used_gb": 37.84241485595703,
+      "gpu_memory_used": 4655.0
+    },
+    {
+      "timestamp": "2025-01-03T14:43:56.815453",
+      "cpu_percent": 11.75,
+      "ram_percent": 59.5,
+      "ram_used_gb": 37.832679748535156,
+      "gpu_memory_used": 4655.0
+    },
+    {
+      "timestamp": "2025-01-03T14:46:41.705155",
+      "cpu_percent": 12.94,
+      "ram_percent": 66.3,
+      "ram_used_gb": 42.1534538269043,
+      "gpu_memory_used": 4729.0
+    },
+    {
+      "timestamp": "2025-01-03T14:46:41.835177",
+      "cpu_percent": 7.73,
+      "ram_percent": 66.2,
+      "ram_used_gb": 42.13554000854492,
+      "gpu_memory_used": 4729.0
+    },
+    {
+      "timestamp": "2025-01-03T14:50:13.166236",
+      "cpu_percent": 11.62,
+      "ram_percent": 73.4,
+      "ram_used_gb": 46.71288299560547,
+      "gpu_memory_used": 4676.0
+    },
+    {
+      "timestamp": "2025-01-03T14:50:13.261611",
+      "cpu_percent": 8.16,
+      "ram_percent": 73.4,
+      "ram_used_gb": 46.71356201171875,
+      "gpu_memory_used": 4676.0
+    },
+    {
+      "timestamp": "2025-01-03T14:55:44.623607",
+      "cpu_percent": 12.92,
+      "ram_percent": 82.8,
+      "ram_used_gb": 52.65533447265625,
+      "gpu_memory_used": 4636.0
+    },
+    {
+      "timestamp": "2025-01-03T14:55:44.735410",
+      "cpu_percent": 15.29,
+      "ram_percent": 82.7,
+      "ram_used_gb": 52.63290786743164,
+      "gpu_memory_used": 4636.0
+    },
+    {
+      "timestamp": "2025-01-03T15:03:40.534449",
+      "cpu_percent": 13.88,
+      "ram_percent": 85.0,
+      "ram_used_gb": 54.050071716308594,
+      "gpu_memory_used": 4771.0
+    },
+    {
+      "timestamp": "2025-01-03T15:03:40.638708",
+      "cpu_percent": 12.21,
+      "ram_percent": 85.0,
+      "ram_used_gb": 54.053733825683594,
+      "gpu_memory_used": 4771.0
+    },
+    {
+      "timestamp": "2025-01-03T15:14:34.159142",
+      "cpu_percent": 14.51,
+      "ram_percent": 78.1,
+      "ram_used_gb": 49.70396423339844,
+      "gpu_memory_used": 4739.0
+    }
+  ]
+}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt
new file mode 100644
index 0000000..010d116
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt
@@ -0,0 +1,19 @@
+=== Benchmark Statistics (with correct RTF) ===
+
+Overall Stats:
+Total tokens processed: 5500
+Total audio generated: 1741.65s
+Total test duration: 831.07s
+Average processing rate: 6.72 tokens/second
+Average RTF: 0.47x
+
+Per-chunk Stats:
+Average chunk size: 550.00 tokens
+Min chunk size: 100.00 tokens
+Max chunk size: 1000.00 tokens
+Average processing time: 82.70s
+Average output length: 174.17s
+
+Performance Ranges:
+Processing rate range: 5.63 - 7.17 tokens/second
+RTF range: 0.44x - 0.55x
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt
new file mode 100644
index 0000000..e7bed5f
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt
@@ -0,0 +1,9 @@
+=== Benchmark Statistics (with correct RTF) ===
+
+Overall Stats:
+Total tokens processed: 150850
+Total audio generated: 46786.59s
+Total test duration: 2012.90s
+Average processing rate: 104.34 tokens/second
+Average RTF: 0.03x
+
diff --git a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json
new file mode 100644
index 0000000..edcb334
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json
@@ -0,0 +1,1804 @@
+{
+  "results": [
+    {
+      "tokens": 300,
+      "processing_time": 41.62,
+      "output_length": 96.425,
+      "rtf": 0.43,
+      "elapsed_time": 41.68
+    },
+    {
+      "tokens": 600,
+      "processing_time": 81.72,
+      "output_length": 188.675,
+      "rtf": 0.43,
+      "elapsed_time": 123.49
+    },
+    {
+      "tokens": 900,
+      "processing_time": 120.55,
+      "output_length": 283.425,
+      "rtf": 0.43,
+      "elapsed_time": 244.1
+    }
+  ],
+  "system_metrics": [
+    {
+      "timestamp": "2025-01-04T01:30:26.991154",
+      "cpu_percent": 7.83,
+      "ram_percent": 48.2,
+      "ram_used_gb": 30.669906616210938,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 0.07800030708312988
+    },
+    {
+      "timestamp": "2025-01-04T01:30:28.079669",
+      "cpu_percent": 59.43,
+      "ram_percent": 48.2,
+      "ram_used_gb": 30.675106048583984,
+      "gpu_memory_used": 1244.0,
+      "relative_time": 1.1842052936553955
+    },
+    {
+      "timestamp": "2025-01-04T01:30:29.185881",
+      "cpu_percent": 57.14,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.473060607910156,
+      "gpu_memory_used": 1246.0,
+      "relative_time": 2.31345796585083
+    },
+    {
+      "timestamp": "2025-01-04T01:30:30.312825",
+      "cpu_percent": 49.54,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.49838638305664,
+      "gpu_memory_used": 1248.0,
+      "relative_time": 3.42720627784729
+    },
+    {
+      "timestamp": "2025-01-04T01:30:31.421201",
+      "cpu_percent": 47.16,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.44550323486328,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 4.517812728881836
+    },
+    {
+      "timestamp": "2025-01-04T01:30:32.514913",
+      "cpu_percent": 47.98,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.41952896118164,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 5.647390604019165
+    },
+    {
+      "timestamp": "2025-01-04T01:30:33.649021",
+      "cpu_percent": 48.55,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.400592803955078,
+      "gpu_memory_used": 1249.0,
+      "relative_time": 6.729969263076782
+    },
+    {
+      "timestamp": "2025-01-04T01:30:34.723785",
+      "cpu_percent": 43.88,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.390079498291016,
+      "gpu_memory_used": 1253.0,
+      "relative_time": 7.860571622848511
+    },
+    {
+      "timestamp": "2025-01-04T01:30:35.864707",
+      "cpu_percent": 50.01,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.380477905273438,
+      "gpu_memory_used": 1253.0,
+      "relative_time": 8.9869704246521
+    },
+    {
+      "timestamp": "2025-01-04T01:30:36.982950",
+      "cpu_percent": 49.29,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.41130828857422,
+      "gpu_memory_used": 1255.0,
+      "relative_time": 10.097310066223145
+    },
+    {
+      "timestamp": "2025-01-04T01:30:38.099505",
+      "cpu_percent": 52.99,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.410892486572266,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 11.204046249389648
+    },
+    {
+      "timestamp": "2025-01-04T01:30:39.205066",
+      "cpu_percent": 42.98,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.40534210205078,
+      "gpu_memory_used": 1253.0,
+      "relative_time": 12.306914329528809
+    },
+    {
+      "timestamp": "2025-01-04T01:30:40.305591",
+      "cpu_percent": 47.11,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.40200424194336,
+      "gpu_memory_used": 1253.0,
+      "relative_time": 13.411193370819092
+    },
+    {
+      "timestamp": "2025-01-04T01:30:41.410928",
+      "cpu_percent": 50.09,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.39764404296875,
+      "gpu_memory_used": 1260.0,
+      "relative_time": 14.534100770950317
+    },
+    {
+      "timestamp": "2025-01-04T01:30:42.530654",
+      "cpu_percent": 57.82,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.39893341064453,
+      "gpu_memory_used": 1256.0,
+      "relative_time": 15.66111135482788
+    },
+    {
+      "timestamp": "2025-01-04T01:30:43.666031",
+      "cpu_percent": 52.61,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.37706756591797,
+      "gpu_memory_used": 1256.0,
+      "relative_time": 16.79327368736267
+    },
+    {
+      "timestamp": "2025-01-04T01:30:44.794904",
+      "cpu_percent": 57.14,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.36868667602539,
+      "gpu_memory_used": 1256.0,
+      "relative_time": 17.861677646636963
+    },
+    {
+      "timestamp": "2025-01-04T01:30:45.865891",
+      "cpu_percent": 66.7,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.371902465820312,
+      "gpu_memory_used": 1257.0,
+      "relative_time": 18.96451497077942
+    },
+    {
+      "timestamp": "2025-01-04T01:30:46.971206",
+      "cpu_percent": 53.61,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.352508544921875,
+      "gpu_memory_used": 1254.0,
+      "relative_time": 20.086195945739746
+    },
+    {
+      "timestamp": "2025-01-04T01:30:48.089632",
+      "cpu_percent": 50.26,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.349388122558594,
+      "gpu_memory_used": 1248.0,
+      "relative_time": 21.199003219604492
+    },
+    {
+      "timestamp": "2025-01-04T01:30:49.191842",
+      "cpu_percent": 48.22,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.344642639160156,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 22.322958946228027
+    },
+    {
+      "timestamp": "2025-01-04T01:30:50.324994",
+      "cpu_percent": 55.64,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.35323715209961,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 23.469967365264893
+    },
+    {
+      "timestamp": "2025-01-04T01:30:51.477231",
+      "cpu_percent": 45.68,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.35232162475586,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 24.579415798187256
+    },
+    {
+      "timestamp": "2025-01-04T01:30:52.585934",
+      "cpu_percent": 46.07,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.32147216796875,
+      "gpu_memory_used": 1244.0,
+      "relative_time": 25.71301007270813
+    },
+    {
+      "timestamp": "2025-01-04T01:30:53.707821",
+      "cpu_percent": 47.54,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.296611785888672,
+      "gpu_memory_used": 1244.0,
+      "relative_time": 26.7750301361084
+    },
+    {
+      "timestamp": "2025-01-04T01:30:54.766880",
+      "cpu_percent": 44.8,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.28769302368164,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 27.87526297569275
+    },
+    {
+      "timestamp": "2025-01-04T01:30:55.873403",
+      "cpu_percent": 48.82,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.285594940185547,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 29.00292205810547
+    },
+    {
+      "timestamp": "2025-01-04T01:30:57.003386",
+      "cpu_percent": 55.54,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.30721664428711,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 30.13248038291931
+    },
+    {
+      "timestamp": "2025-01-04T01:30:58.135723",
+      "cpu_percent": 46.97,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.319698333740234,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 31.280652046203613
+    },
+    {
+      "timestamp": "2025-01-04T01:30:59.274397",
+      "cpu_percent": 46.94,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.31420135498047,
+      "gpu_memory_used": 1239.0,
+      "relative_time": 32.39983797073364
+    },
+    {
+      "timestamp": "2025-01-04T01:31:00.405545",
+      "cpu_percent": 53.81,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.335922241210938,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 33.502938985824585
+    },
+    {
+      "timestamp": "2025-01-04T01:31:01.497496",
+      "cpu_percent": 51.0,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.325199127197266,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 34.584938526153564
+    },
+    {
+      "timestamp": "2025-01-04T01:31:02.583134",
+      "cpu_percent": 49.26,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.30097198486328,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 35.680947065353394
+    },
+    {
+      "timestamp": "2025-01-04T01:31:03.686381",
+      "cpu_percent": 48.91,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.300418853759766,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 36.786722898483276
+    },
+    {
+      "timestamp": "2025-01-04T01:31:04.786497",
+      "cpu_percent": 48.69,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.29620361328125,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 37.90794491767883
+    },
+    {
+      "timestamp": "2025-01-04T01:31:05.908563",
+      "cpu_percent": 50.43,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.29269027709961,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 39.01517176628113
+    },
+    {
+      "timestamp": "2025-01-04T01:31:07.014496",
+      "cpu_percent": 48.22,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.298015594482422,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 40.118446826934814
+    },
+    {
+      "timestamp": "2025-01-04T01:31:08.120066",
+      "cpu_percent": 47.47,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.312705993652344,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 41.22802424430847
+    },
+    {
+      "timestamp": "2025-01-04T01:31:09.225367",
+      "cpu_percent": 41.09,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.34886932373047,
+      "gpu_memory_used": 1244.0,
+      "relative_time": 42.34174656867981
+    },
+    {
+      "timestamp": "2025-01-04T01:31:10.339308",
+      "cpu_percent": 44.12,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.353790283203125,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 43.44456744194031
+    },
+    {
+      "timestamp": "2025-01-04T01:31:11.443944",
+      "cpu_percent": 48.99,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.34658432006836,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 44.53658318519592
+    },
+    {
+      "timestamp": "2025-01-04T01:31:12.533026",
+      "cpu_percent": 47.62,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.318241119384766,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 45.6171441078186
+    },
+    {
+      "timestamp": "2025-01-04T01:31:13.617044",
+      "cpu_percent": 49.3,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.318588256835938,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 46.71653604507446
+    },
+    {
+      "timestamp": "2025-01-04T01:31:14.718976",
+      "cpu_percent": 48.42,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.316349029541016,
+      "gpu_memory_used": 1239.0,
+      "relative_time": 47.80844783782959
+    },
+    {
+      "timestamp": "2025-01-04T01:31:15.805079",
+      "cpu_percent": 47.56,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.30520248413086,
+      "gpu_memory_used": 1239.0,
+      "relative_time": 48.90499949455261
+    },
+    {
+      "timestamp": "2025-01-04T01:31:16.902878",
+      "cpu_percent": 49.11,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.306812286376953,
+      "gpu_memory_used": 1232.0,
+      "relative_time": 50.034260749816895
+    },
+    {
+      "timestamp": "2025-01-04T01:31:18.035723",
+      "cpu_percent": 45.81,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.32524871826172,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 51.1371693611145
+    },
+    {
+      "timestamp": "2025-01-04T01:31:19.143169",
+      "cpu_percent": 49.94,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.323795318603516,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 52.227344274520874
+    },
+    {
+      "timestamp": "2025-01-04T01:31:20.230256",
+      "cpu_percent": 39.57,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.330493927001953,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 53.34033155441284
+    },
+    {
+      "timestamp": "2025-01-04T01:31:21.331797",
+      "cpu_percent": 44.34,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.330425262451172,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 54.45246958732605
+    },
+    {
+      "timestamp": "2025-01-04T01:31:22.450663",
+      "cpu_percent": 46.87,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.3084716796875,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 55.55728077888489
+    },
+    {
+      "timestamp": "2025-01-04T01:31:23.550691",
+      "cpu_percent": 49.88,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.309173583984375,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 56.65515089035034
+    },
+    {
+      "timestamp": "2025-01-04T01:31:24.650939",
+      "cpu_percent": 51.21,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.30620574951172,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 57.726617097854614
+    },
+    {
+      "timestamp": "2025-01-04T01:31:25.728955",
+      "cpu_percent": 45.22,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.291912078857422,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 58.82792663574219
+    },
+    {
+      "timestamp": "2025-01-04T01:31:26.829490",
+      "cpu_percent": 48.86,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.289695739746094,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 59.93786025047302
+    },
+    {
+      "timestamp": "2025-01-04T01:31:27.937071",
+      "cpu_percent": 45.69,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.302818298339844,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 61.05047869682312
+    },
+    {
+      "timestamp": "2025-01-04T01:31:29.044046",
+      "cpu_percent": 51.09,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.307464599609375,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 62.159112215042114
+    },
+    {
+      "timestamp": "2025-01-04T01:31:30.162426",
+      "cpu_percent": 47.04,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.32668685913086,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 63.249592542648315
+    },
+    {
+      "timestamp": "2025-01-04T01:31:31.251755",
+      "cpu_percent": 45.32,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.330463409423828,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 64.35896062850952
+    },
+    {
+      "timestamp": "2025-01-04T01:31:32.362284",
+      "cpu_percent": 47.2,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.314319610595703,
+      "gpu_memory_used": 1239.0,
+      "relative_time": 65.4672338962555
+    },
+    {
+      "timestamp": "2025-01-04T01:31:33.468921",
+      "cpu_percent": 48.94,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.308246612548828,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 66.5955581665039
+    },
+    {
+      "timestamp": "2025-01-04T01:31:34.594176",
+      "cpu_percent": 47.88,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.29806137084961,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 67.68029594421387
+    },
+    {
+      "timestamp": "2025-01-04T01:31:35.682260",
+      "cpu_percent": 45.92,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.299114227294922,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 68.7970290184021
+    },
+    {
+      "timestamp": "2025-01-04T01:31:36.802433",
+      "cpu_percent": 51.07,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.29195785522461,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 69.92168736457825
+    },
+    {
+      "timestamp": "2025-01-04T01:31:37.926464",
+      "cpu_percent": 47.29,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.324363708496094,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 71.05467820167542
+    },
+    {
+      "timestamp": "2025-01-04T01:31:39.059936",
+      "cpu_percent": 48.91,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.32428741455078,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 72.14405465126038
+    },
+    {
+      "timestamp": "2025-01-04T01:31:40.142859",
+      "cpu_percent": 44.66,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.33354949951172,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 73.25559496879578
+    },
+    {
+      "timestamp": "2025-01-04T01:31:41.254868",
+      "cpu_percent": 48.98,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.344337463378906,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 74.35676956176758
+    },
+    {
+      "timestamp": "2025-01-04T01:31:42.354977",
+      "cpu_percent": 50.79,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.322650909423828,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 75.43929266929626
+    },
+    {
+      "timestamp": "2025-01-04T01:31:43.432869",
+      "cpu_percent": 45.86,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.316268920898438,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 76.53794598579407
+    },
+    {
+      "timestamp": "2025-01-04T01:31:44.535917",
+      "cpu_percent": 47.22,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.308757781982422,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 77.6620762348175
+    },
+    {
+      "timestamp": "2025-01-04T01:31:45.666281",
+      "cpu_percent": 51.06,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.307342529296875,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 78.77155900001526
+    },
+    {
+      "timestamp": "2025-01-04T01:31:46.771605",
+      "cpu_percent": 47.82,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.298141479492188,
+      "gpu_memory_used": 1237.0,
+      "relative_time": 79.87201809883118
+    },
+    {
+      "timestamp": "2025-01-04T01:31:47.874817",
+      "cpu_percent": 44.51,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.322750091552734,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 80.97521829605103
+    },
+    {
+      "timestamp": "2025-01-04T01:31:48.983338",
+      "cpu_percent": 47.69,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.3226318359375,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 82.09707593917847
+    },
+    {
+      "timestamp": "2025-01-04T01:31:50.102541",
+      "cpu_percent": 42.36,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.32965087890625,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 83.20944809913635
+    },
+    {
+      "timestamp": "2025-01-04T01:31:51.204766",
+      "cpu_percent": 45.87,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.32353973388672,
+      "gpu_memory_used": 1243.0,
+      "relative_time": 84.31531429290771
+    },
+    {
+      "timestamp": "2025-01-04T01:31:52.310873",
+      "cpu_percent": 50.01,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.296016693115234,
+      "gpu_memory_used": 1247.0,
+      "relative_time": 85.4254515171051
+    },
+    {
+      "timestamp": "2025-01-04T01:31:53.429342",
+      "cpu_percent": 49.65,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.306453704833984,
+      "gpu_memory_used": 1246.0,
+      "relative_time": 86.51991653442383
+    },
+    {
+      "timestamp": "2025-01-04T01:31:54.517894",
+      "cpu_percent": 47.29,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.30263900756836,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 87.60364723205566
+    },
+    {
+      "timestamp": "2025-01-04T01:31:55.602848",
+      "cpu_percent": 47.48,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.303203582763672,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 88.68531346321106
+    },
+    {
+      "timestamp": "2025-01-04T01:31:56.677895",
+      "cpu_percent": 46.74,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.29749298095703,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 89.78639531135559
+    },
+    {
+      "timestamp": "2025-01-04T01:31:57.794084",
+      "cpu_percent": 43.92,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.313438415527344,
+      "gpu_memory_used": 1244.0,
+      "relative_time": 90.89922308921814
+    },
+    {
+      "timestamp": "2025-01-04T01:31:58.901464",
+      "cpu_percent": 48.88,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.32254409790039,
+      "gpu_memory_used": 1244.0,
+      "relative_time": 91.96823143959045
+    },
+    {
+      "timestamp": "2025-01-04T01:31:59.972227",
+      "cpu_percent": 38.89,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.32897186279297,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 93.08689904212952
+    },
+    {
+      "timestamp": "2025-01-04T01:32:01.089013",
+      "cpu_percent": 49.22,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.328304290771484,
+      "gpu_memory_used": 1250.0,
+      "relative_time": 94.20951867103577
+    },
+    {
+      "timestamp": "2025-01-04T01:32:02.202304",
+      "cpu_percent": 46.56,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.29920196533203,
+      "gpu_memory_used": 1250.0,
+      "relative_time": 95.29210877418518
+    },
+    {
+      "timestamp": "2025-01-04T01:32:03.292108",
+      "cpu_percent": 46.39,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.308143615722656,
+      "gpu_memory_used": 1250.0,
+      "relative_time": 96.40629982948303
+    },
+    {
+      "timestamp": "2025-01-04T01:32:04.402400",
+      "cpu_percent": 49.88,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.310047149658203,
+      "gpu_memory_used": 1250.0,
+      "relative_time": 97.51973557472229
+    },
+    {
+      "timestamp": "2025-01-04T01:32:05.513450",
+      "cpu_percent": 53.28,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.30374526977539,
+      "gpu_memory_used": 1249.0,
+      "relative_time": 98.62612318992615
+    },
+    {
+      "timestamp": "2025-01-04T01:32:06.631627",
+      "cpu_percent": 44.65,
+      "ram_percent": 47.6,
+      "ram_used_gb": 30.30333709716797,
+      "gpu_memory_used": 1242.0,
+      "relative_time": 99.73457670211792
+    },
+    {
+      "timestamp": "2025-01-04T01:32:07.736449",
+      "cpu_percent": 50.93,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.33118438720703,
+      "gpu_memory_used": 1242.0,
+      "relative_time": 100.85807871818542
+    },
+    {
+      "timestamp": "2025-01-04T01:32:08.860429",
+      "cpu_percent": 62.71,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.41672134399414,
+      "gpu_memory_used": 1244.0,
+      "relative_time": 102.08941197395325
+    },
+    {
+      "timestamp": "2025-01-04T01:32:10.080974",
+      "cpu_percent": 96.29,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.45757293701172,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 103.18154048919678
+    },
+    {
+      "timestamp": "2025-01-04T01:32:11.187912",
+      "cpu_percent": 49.09,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.445499420166016,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 104.30198311805725
+    },
+    {
+      "timestamp": "2025-01-04T01:32:12.306213",
+      "cpu_percent": 51.15,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.42266845703125,
+      "gpu_memory_used": 1240.0,
+      "relative_time": 105.43745422363281
+    },
+    {
+      "timestamp": "2025-01-04T01:32:13.437791",
+      "cpu_percent": 47.79,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.40296173095703,
+      "gpu_memory_used": 1236.0,
+      "relative_time": 106.55609393119812
+    },
+    {
+      "timestamp": "2025-01-04T01:32:14.548441",
+      "cpu_percent": 39.41,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.406475067138672,
+      "gpu_memory_used": 1244.0,
+      "relative_time": 107.67082047462463
+    },
+    {
+      "timestamp": "2025-01-04T01:32:15.666526",
+      "cpu_percent": 77.07,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.424407958984375,
+      "gpu_memory_used": 1247.0,
+      "relative_time": 108.7851665019989
+    },
+    {
+      "timestamp": "2025-01-04T01:32:16.780793",
+      "cpu_percent": 49.13,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.429065704345703,
+      "gpu_memory_used": 1246.0,
+      "relative_time": 109.88107633590698
+    },
+    {
+      "timestamp": "2025-01-04T01:32:17.879071",
+      "cpu_percent": 82.96,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.428447723388672,
+      "gpu_memory_used": 1281.0,
+      "relative_time": 111.02328372001648
+    },
+    {
+      "timestamp": "2025-01-04T01:32:19.026978",
+      "cpu_percent": 74.64,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.430500030517578,
+      "gpu_memory_used": 1281.0,
+      "relative_time": 112.15347504615784
+    },
+    {
+      "timestamp": "2025-01-04T01:32:20.156784",
+      "cpu_percent": 76.94,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.40774917602539,
+      "gpu_memory_used": 1274.0,
+      "relative_time": 113.31317591667175
+    },
+    {
+      "timestamp": "2025-01-04T01:32:21.310871",
+      "cpu_percent": 69.52,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.429115295410156,
+      "gpu_memory_used": 1273.0,
+      "relative_time": 114.42301273345947
+    },
+    {
+      "timestamp": "2025-01-04T01:32:22.424508",
+      "cpu_percent": 74.47,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.403045654296875,
+      "gpu_memory_used": 1274.0,
+      "relative_time": 115.52539491653442
+    },
+    {
+      "timestamp": "2025-01-04T01:32:23.525673",
+      "cpu_percent": 67.2,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.484474182128906,
+      "gpu_memory_used": 1273.0,
+      "relative_time": 116.61319661140442
+    },
+    {
+      "timestamp": "2025-01-04T01:32:24.613302",
+      "cpu_percent": 57.41,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.35879135131836,
+      "gpu_memory_used": 1270.0,
+      "relative_time": 117.72619676589966
+    },
+    {
+      "timestamp": "2025-01-04T01:32:25.730732",
+      "cpu_percent": 45.97,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.347335815429688,
+      "gpu_memory_used": 1270.0,
+      "relative_time": 118.84320116043091
+    },
+    {
+      "timestamp": "2025-01-04T01:32:26.845420",
+      "cpu_percent": 47.74,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.354007720947266,
+      "gpu_memory_used": 1265.0,
+      "relative_time": 119.96074485778809
+    },
+    {
+      "timestamp": "2025-01-04T01:32:27.964248",
+      "cpu_percent": 60.0,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.3675537109375,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 121.09439873695374
+    },
+    {
+      "timestamp": "2025-01-04T01:32:29.094542",
+      "cpu_percent": 54.46,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.367305755615234,
+      "gpu_memory_used": 1230.0,
+      "relative_time": 122.24102592468262
+    },
+    {
+      "timestamp": "2025-01-04T01:32:30.244200",
+      "cpu_percent": 56.21,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.364959716796875,
+      "gpu_memory_used": 1230.0,
+      "relative_time": 123.34450554847717
+    },
+    {
+      "timestamp": "2025-01-04T01:32:31.346103",
+      "cpu_percent": 40.66,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.420738220214844,
+      "gpu_memory_used": 1235.0,
+      "relative_time": 124.46777892112732
+    },
+    {
+      "timestamp": "2025-01-04T01:32:32.463710",
+      "cpu_percent": 51.66,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.396198272705078,
+      "gpu_memory_used": 1235.0,
+      "relative_time": 125.57916116714478
+    },
+    {
+      "timestamp": "2025-01-04T01:32:33.580811",
+      "cpu_percent": 49.68,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.40151596069336,
+      "gpu_memory_used": 1236.0,
+      "relative_time": 126.6768786907196
+    },
+    {
+      "timestamp": "2025-01-04T01:32:34.668960",
+      "cpu_percent": 49.09,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.380916595458984,
+      "gpu_memory_used": 1236.0,
+      "relative_time": 127.73568296432495
+    },
+    {
+      "timestamp": "2025-01-04T01:32:35.729484",
+      "cpu_percent": 48.53,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.385761260986328,
+      "gpu_memory_used": 1236.0,
+      "relative_time": 128.85891699790955
+    },
+    {
+      "timestamp": "2025-01-04T01:32:36.849812",
+      "cpu_percent": 52.39,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.414752960205078,
+      "gpu_memory_used": 1235.0,
+      "relative_time": 129.9150390625
+    },
+    {
+      "timestamp": "2025-01-04T01:32:37.919974",
+      "cpu_percent": 46.89,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.450218200683594,
+      "gpu_memory_used": 1235.0,
+      "relative_time": 131.00502228736877
+    },
+    {
+      "timestamp": "2025-01-04T01:32:39.008115",
+      "cpu_percent": 46.59,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.436458587646484,
+      "gpu_memory_used": 1235.0,
+      "relative_time": 132.10191130638123
+    },
+    {
+      "timestamp": "2025-01-04T01:32:40.095463",
+      "cpu_percent": 45.76,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.443893432617188,
+      "gpu_memory_used": 1224.0,
+      "relative_time": 133.26839780807495
+    },
+    {
+      "timestamp": "2025-01-04T01:32:41.265737",
+      "cpu_percent": 56.94,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.41216278076172,
+      "gpu_memory_used": 1224.0,
+      "relative_time": 134.32926607131958
+    },
+    {
+      "timestamp": "2025-01-04T01:32:42.321015",
+      "cpu_percent": 40.36,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.386669158935547,
+      "gpu_memory_used": 1224.0,
+      "relative_time": 135.40537309646606
+    },
+    {
+      "timestamp": "2025-01-04T01:32:43.400382",
+      "cpu_percent": 44.51,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.39049530029297,
+      "gpu_memory_used": 1224.0,
+      "relative_time": 136.52469301223755
+    },
+    {
+      "timestamp": "2025-01-04T01:32:44.524119",
+      "cpu_percent": 50.29,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.38903045654297,
+      "gpu_memory_used": 1220.0,
+      "relative_time": 137.60522270202637
+    },
+    {
+      "timestamp": "2025-01-04T01:32:45.599869",
+      "cpu_percent": 51.69,
+      "ram_percent": 47.8,
+      "ram_used_gb": 30.378681182861328,
+      "gpu_memory_used": 1213.0,
+      "relative_time": 138.7130560874939
+    },
+    {
+      "timestamp": "2025-01-04T01:32:46.711674",
+      "cpu_percent": 49.55,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.34076690673828,
+      "gpu_memory_used": 1213.0,
+      "relative_time": 139.8105547428131
+    },
+    {
+      "timestamp": "2025-01-04T01:32:47.813091",
+      "cpu_percent": 44.5,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.343746185302734,
+      "gpu_memory_used": 1213.0,
+      "relative_time": 140.91643166542053
+    },
+    {
+      "timestamp": "2025-01-04T01:32:48.917679",
+      "cpu_percent": 43.76,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.354793548583984,
+      "gpu_memory_used": 1213.0,
+      "relative_time": 142.04264283180237
+    },
+    {
+      "timestamp": "2025-01-04T01:32:50.047653",
+      "cpu_percent": 48.41,
+      "ram_percent": 47.7,
+      "ram_used_gb": 30.361080169677734,
+      "gpu_memory_used": 1219.0,
+      "relative_time": 143.14667677879333
+    },
+    {
+      "timestamp": "2025-01-04T01:32:51.153490",
+      "cpu_percent": 57.01,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.4390869140625,
+      "gpu_memory_used": 1232.0,
+      "relative_time": 144.2709481716156
+    },
+    {
+      "timestamp": "2025-01-04T01:32:52.272196",
+      "cpu_percent": 54.69,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.46664047241211,
+      "gpu_memory_used": 1236.0,
+      "relative_time": 145.36933588981628
+    },
+    {
+      "timestamp": "2025-01-04T01:32:53.374563",
+      "cpu_percent": 51.37,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.487388610839844,
+      "gpu_memory_used": 1245.0,
+      "relative_time": 146.4400930404663
+    },
+    {
+      "timestamp": "2025-01-04T01:32:54.445178",
+      "cpu_percent": 47.76,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.475635528564453,
+      "gpu_memory_used": 1241.0,
+      "relative_time": 147.5295627117157
+    },
+    {
+      "timestamp": "2025-01-04T01:32:55.520495",
+      "cpu_percent": 49.24,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.47634506225586,
+      "gpu_memory_used": 1236.0,
+      "relative_time": 148.5926468372345
+    },
+    {
+      "timestamp": "2025-01-04T01:32:56.591995",
+      "cpu_percent": 53.63,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.49687957763672,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 149.72928547859192
+    },
+    {
+      "timestamp": "2025-01-04T01:32:57.727346",
+      "cpu_percent": 65.04,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.59111785888672,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 150.86237502098083
+    },
+    {
+      "timestamp": "2025-01-04T01:32:58.862812",
+      "cpu_percent": 71.05,
+      "ram_percent": 48.2,
+      "ram_used_gb": 30.633731842041016,
+      "gpu_memory_used": 1263.0,
+      "relative_time": 152.03348207473755
+    },
+    {
+      "timestamp": "2025-01-04T01:33:00.037915",
+      "cpu_percent": 85.87,
+      "ram_percent": 48.2,
+      "ram_used_gb": 30.68001937866211,
+      "gpu_memory_used": 1253.0,
+      "relative_time": 153.1551034450531
+    },
+    {
+      "timestamp": "2025-01-04T01:33:01.158119",
+      "cpu_percent": 59.8,
+      "ram_percent": 48.2,
+      "ram_used_gb": 30.69198989868164,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 154.2606840133667
+    },
+    {
+      "timestamp": "2025-01-04T01:33:02.262390",
+      "cpu_percent": 45.33,
+      "ram_percent": 48.3,
+      "ram_used_gb": 30.743839263916016,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 155.3663365840912
+    },
+    {
+      "timestamp": "2025-01-04T01:33:03.369936",
+      "cpu_percent": 35.41,
+      "ram_percent": 48.2,
+      "ram_used_gb": 30.68472671508789,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 156.4842345714569
+    },
+    {
+      "timestamp": "2025-01-04T01:33:04.488089",
+      "cpu_percent": 47.22,
+      "ram_percent": 48.4,
+      "ram_used_gb": 30.78485870361328,
+      "gpu_memory_used": 1254.0,
+      "relative_time": 157.58868670463562
+    },
+    {
+      "timestamp": "2025-01-04T01:33:05.592303",
+      "cpu_percent": 36.14,
+      "ram_percent": 48.5,
+      "ram_used_gb": 30.87320327758789,
+      "gpu_memory_used": 1254.0,
+      "relative_time": 158.71629786491394
+    },
+    {
+      "timestamp": "2025-01-04T01:33:06.721317",
+      "cpu_percent": 38.46,
+      "ram_percent": 48.2,
+      "ram_used_gb": 30.668170928955078,
+      "gpu_memory_used": 1254.0,
+      "relative_time": 159.82655477523804
+    },
+    {
+      "timestamp": "2025-01-04T01:33:07.827187",
+      "cpu_percent": 35.81,
+      "ram_percent": 48.4,
+      "ram_used_gb": 30.777912139892578,
+      "gpu_memory_used": 1254.0,
+      "relative_time": 160.94229197502136
+    },
+    {
+      "timestamp": "2025-01-04T01:33:08.943035",
+      "cpu_percent": 39.24,
+      "ram_percent": 48.5,
+      "ram_used_gb": 30.86941146850586,
+      "gpu_memory_used": 1254.0,
+      "relative_time": 162.06378889083862
+    },
+    {
+      "timestamp": "2025-01-04T01:33:10.063208",
+      "cpu_percent": 51.52,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.624229431152344,
+      "gpu_memory_used": 1254.0,
+      "relative_time": 163.16198420524597
+    },
+    {
+      "timestamp": "2025-01-04T01:33:11.163067",
+      "cpu_percent": 48.99,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.612281799316406,
+      "gpu_memory_used": 1254.0,
+      "relative_time": 164.26579809188843
+    },
+    {
+      "timestamp": "2025-01-04T01:33:12.266417",
+      "cpu_percent": 46.27,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.584861755371094,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 165.35981583595276
+    },
+    {
+      "timestamp": "2025-01-04T01:33:13.354673",
+      "cpu_percent": 45.71,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.582279205322266,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 166.45263361930847
+    },
+    {
+      "timestamp": "2025-01-04T01:33:14.447308",
+      "cpu_percent": 48.69,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.584793090820312,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 167.54857754707336
+    },
+    {
+      "timestamp": "2025-01-04T01:33:15.552042",
+      "cpu_percent": 48.66,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.580883026123047,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 168.659592628479
+    },
+    {
+      "timestamp": "2025-01-04T01:33:16.653015",
+      "cpu_percent": 50.37,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.573726654052734,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 169.7969992160797
+    },
+    {
+      "timestamp": "2025-01-04T01:33:17.802854",
+      "cpu_percent": 49.45,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.587318420410156,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 170.891606092453
+    },
+    {
+      "timestamp": "2025-01-04T01:33:18.893192",
+      "cpu_percent": 50.16,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.5953369140625,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 172.0133557319641
+    },
+    {
+      "timestamp": "2025-01-04T01:33:20.008593",
+      "cpu_percent": 47.57,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.6124267578125,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 173.0913679599762
+    },
+    {
+      "timestamp": "2025-01-04T01:33:21.097576",
+      "cpu_percent": 44.32,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.584686279296875,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 174.20030999183655
+    },
+    {
+      "timestamp": "2025-01-04T01:33:22.201335",
+      "cpu_percent": 49.01,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.547630310058594,
+      "gpu_memory_used": 1252.0,
+      "relative_time": 175.30235862731934
+    },
+    {
+      "timestamp": "2025-01-04T01:33:23.306131",
+      "cpu_percent": 43.7,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.559757232666016,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 176.40550017356873
+    },
+    {
+      "timestamp": "2025-01-04T01:33:24.408896",
+      "cpu_percent": 48.77,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.5601806640625,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 177.4984576702118
+    },
+    {
+      "timestamp": "2025-01-04T01:33:25.496705",
+      "cpu_percent": 50.56,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.556926727294922,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 178.58782863616943
+    },
+    {
+      "timestamp": "2025-01-04T01:33:26.588438",
+      "cpu_percent": 47.76,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.53600311279297,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 179.67969870567322
+    },
+    {
+      "timestamp": "2025-01-04T01:33:27.679807",
+      "cpu_percent": 49.0,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.540546417236328,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 180.78388810157776
+    },
+    {
+      "timestamp": "2025-01-04T01:33:28.780263",
+      "cpu_percent": 49.25,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.55233383178711,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 181.88185930252075
+    },
+    {
+      "timestamp": "2025-01-04T01:33:29.881869",
+      "cpu_percent": 47.08,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.56603240966797,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 182.9666450023651
+    },
+    {
+      "timestamp": "2025-01-04T01:33:30.957821",
+      "cpu_percent": 45.77,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.559410095214844,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 184.05338644981384
+    },
+    {
+      "timestamp": "2025-01-04T01:33:32.047377",
+      "cpu_percent": 50.79,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.534175872802734,
+      "gpu_memory_used": 1251.0,
+      "relative_time": 185.17484974861145
+    },
+    {
+      "timestamp": "2025-01-04T01:33:33.167413",
+      "cpu_percent": 52.13,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.54046630859375,
+      "gpu_memory_used": 1266.0,
+      "relative_time": 186.23550605773926
+    },
+    {
+      "timestamp": "2025-01-04T01:33:34.226743",
+      "cpu_percent": 43.81,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.54621124267578,
+      "gpu_memory_used": 1266.0,
+      "relative_time": 187.30887961387634
+    },
+    {
+      "timestamp": "2025-01-04T01:33:35.303398",
+      "cpu_percent": 49.28,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.545230865478516,
+      "gpu_memory_used": 1264.0,
+      "relative_time": 188.40410709381104
+    },
+    {
+      "timestamp": "2025-01-04T01:33:36.405660",
+      "cpu_percent": 46.44,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.540679931640625,
+      "gpu_memory_used": 1264.0,
+      "relative_time": 189.47515082359314
+    },
+    {
+      "timestamp": "2025-01-04T01:33:37.469955",
+      "cpu_percent": 41.6,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.562320709228516,
+      "gpu_memory_used": 1264.0,
+      "relative_time": 190.56309294700623
+    },
+    {
+      "timestamp": "2025-01-04T01:33:38.556728",
+      "cpu_percent": 50.52,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.561084747314453,
+      "gpu_memory_used": 1264.0,
+      "relative_time": 191.66572499275208
+    },
+    {
+      "timestamp": "2025-01-04T01:33:39.665385",
+      "cpu_percent": 40.93,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.577682495117188,
+      "gpu_memory_used": 1264.0,
+      "relative_time": 192.76011109352112
+    },
+    {
+      "timestamp": "2025-01-04T01:33:40.754482",
+      "cpu_percent": 50.46,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.5740966796875,
+      "gpu_memory_used": 1262.0,
+      "relative_time": 193.90924453735352
+    },
+    {
+      "timestamp": "2025-01-04T01:33:41.903437",
+      "cpu_percent": 52.75,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.58869171142578,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 195.0148274898529
+    },
+    {
+      "timestamp": "2025-01-04T01:33:43.008520",
+      "cpu_percent": 50.04,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.560386657714844,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 196.12349009513855
+    },
+    {
+      "timestamp": "2025-01-04T01:33:44.129194",
+      "cpu_percent": 51.56,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.572277069091797,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 197.20997285842896
+    },
+    {
+      "timestamp": "2025-01-04T01:33:45.212927",
+      "cpu_percent": 47.77,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.556873321533203,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 198.29724264144897
+    },
+    {
+      "timestamp": "2025-01-04T01:33:46.288883",
+      "cpu_percent": 46.07,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.554439544677734,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 199.39549779891968
+    },
+    {
+      "timestamp": "2025-01-04T01:33:47.403171",
+      "cpu_percent": 46.18,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.557025909423828,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 200.50221276283264
+    },
+    {
+      "timestamp": "2025-01-04T01:33:48.495515",
+      "cpu_percent": 48.09,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.558856964111328,
+      "gpu_memory_used": 1267.0,
+      "relative_time": 201.62405467033386
+    },
+    {
+      "timestamp": "2025-01-04T01:33:49.630725",
+      "cpu_percent": 53.47,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.59896469116211,
+      "gpu_memory_used": 1283.0,
+      "relative_time": 202.70162987709045
+    },
+    {
+      "timestamp": "2025-01-04T01:33:50.709226",
+      "cpu_percent": 44.74,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.581470489501953,
+      "gpu_memory_used": 1281.0,
+      "relative_time": 203.78962469100952
+    },
+    {
+      "timestamp": "2025-01-04T01:33:51.782302",
+      "cpu_percent": 43.4,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.582977294921875,
+      "gpu_memory_used": 1282.0,
+      "relative_time": 204.87054562568665
+    },
+    {
+      "timestamp": "2025-01-04T01:33:52.868020",
+      "cpu_percent": 51.75,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.540206909179688,
+      "gpu_memory_used": 1282.0,
+      "relative_time": 205.95602416992188
+    },
+    {
+      "timestamp": "2025-01-04T01:33:53.956023",
+      "cpu_percent": 46.36,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.562763214111328,
+      "gpu_memory_used": 1282.0,
+      "relative_time": 207.06639337539673
+    },
+    {
+      "timestamp": "2025-01-04T01:33:55.064043",
+      "cpu_percent": 43.91,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.560302734375,
+      "gpu_memory_used": 1277.0,
+      "relative_time": 208.16699743270874
+    },
+    {
+      "timestamp": "2025-01-04T01:33:56.170674",
+      "cpu_percent": 50.01,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.576671600341797,
+      "gpu_memory_used": 1281.0,
+      "relative_time": 209.28660559654236
+    },
+    {
+      "timestamp": "2025-01-04T01:33:57.288316",
+      "cpu_percent": 50.51,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.551471710205078,
+      "gpu_memory_used": 1280.0,
+      "relative_time": 210.4030442237854
+    },
+    {
+      "timestamp": "2025-01-04T01:33:58.407032",
+      "cpu_percent": 49.43,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.576725006103516,
+      "gpu_memory_used": 1280.0,
+      "relative_time": 211.50494027137756
+    },
+    {
+      "timestamp": "2025-01-04T01:33:59.497806",
+      "cpu_percent": 46.68,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.59314727783203,
+      "gpu_memory_used": 1279.0,
+      "relative_time": 212.6002950668335
+    },
+    {
+      "timestamp": "2025-01-04T01:34:00.598484",
+      "cpu_percent": 57.44,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.60983657836914,
+      "gpu_memory_used": 1285.0,
+      "relative_time": 213.7150914669037
+    },
+    {
+      "timestamp": "2025-01-04T01:34:01.719968",
+      "cpu_percent": 54.58,
+      "ram_percent": 48.1,
+      "ram_used_gb": 30.586456298828125,
+      "gpu_memory_used": 1283.0,
+      "relative_time": 214.80932760238647
+    },
+    {
+      "timestamp": "2025-01-04T01:34:02.807573",
+      "cpu_percent": 61.69,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.53356170654297,
+      "gpu_memory_used": 1281.0,
+      "relative_time": 215.88946890830994
+    },
+    {
+      "timestamp": "2025-01-04T01:34:03.885672",
+      "cpu_percent": 49.46,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.517364501953125,
+      "gpu_memory_used": 1283.0,
+      "relative_time": 216.97114062309265
+    },
+    {
+      "timestamp": "2025-01-04T01:34:04.974449",
+      "cpu_percent": 42.69,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.527969360351562,
+      "gpu_memory_used": 1285.0,
+      "relative_time": 218.10192775726318
+    },
+    {
+      "timestamp": "2025-01-04T01:34:06.107947",
+      "cpu_percent": 54.87,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.51028823852539,
+      "gpu_memory_used": 1273.0,
+      "relative_time": 219.17600679397583
+    },
+    {
+      "timestamp": "2025-01-04T01:34:07.172153",
+      "cpu_percent": 45.42,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.509258270263672,
+      "gpu_memory_used": 1273.0,
+      "relative_time": 220.28902983665466
+    },
+    {
+      "timestamp": "2025-01-04T01:34:08.289623",
+      "cpu_percent": 52.75,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.52011489868164,
+      "gpu_memory_used": 1272.0,
+      "relative_time": 221.39960098266602
+    },
+    {
+      "timestamp": "2025-01-04T01:34:09.406158",
+      "cpu_percent": 52.53,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.52783966064453,
+      "gpu_memory_used": 1265.0,
+      "relative_time": 222.49749565124512
+    },
+    {
+      "timestamp": "2025-01-04T01:34:10.491042",
+      "cpu_percent": 56.49,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.540733337402344,
+      "gpu_memory_used": 1261.0,
+      "relative_time": 223.5777132511139
+    },
+    {
+      "timestamp": "2025-01-04T01:34:11.577710",
+      "cpu_percent": 44.25,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.531757354736328,
+      "gpu_memory_used": 1262.0,
+      "relative_time": 224.68288159370422
+    },
+    {
+      "timestamp": "2025-01-04T01:34:12.682455",
+      "cpu_percent": 47.56,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.50157928466797,
+      "gpu_memory_used": 1262.0,
+      "relative_time": 225.78287291526794
+    },
+    {
+      "timestamp": "2025-01-04T01:34:13.782976",
+      "cpu_percent": 48.52,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.507736206054688,
+      "gpu_memory_used": 1262.0,
+      "relative_time": 226.8910207748413
+    },
+    {
+      "timestamp": "2025-01-04T01:34:14.884200",
+      "cpu_percent": 49.89,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.50653076171875,
+      "gpu_memory_used": 1263.0,
+      "relative_time": 228.04418087005615
+    },
+    {
+      "timestamp": "2025-01-04T01:34:16.051189",
+      "cpu_percent": 49.34,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.504470825195312,
+      "gpu_memory_used": 1263.0,
+      "relative_time": 229.13680815696716
+    },
+    {
+      "timestamp": "2025-01-04T01:34:17.136588",
+      "cpu_percent": 47.8,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.496841430664062,
+      "gpu_memory_used": 1263.0,
+      "relative_time": 230.26778984069824
+    },
+    {
+      "timestamp": "2025-01-04T01:34:18.269616",
+      "cpu_percent": 48.23,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.50909423828125,
+      "gpu_memory_used": 1262.0,
+      "relative_time": 231.3880865573883
+    },
+    {
+      "timestamp": "2025-01-04T01:34:19.387759",
+      "cpu_percent": 42.46,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.525142669677734,
+      "gpu_memory_used": 1262.0,
+      "relative_time": 232.4770486354828
+    },
+    {
+      "timestamp": "2025-01-04T01:34:20.471629",
+      "cpu_percent": 44.17,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.535388946533203,
+      "gpu_memory_used": 1259.0,
+      "relative_time": 233.57954168319702
+    },
+    {
+      "timestamp": "2025-01-04T01:34:21.576615",
+      "cpu_percent": 45.36,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.529708862304688,
+      "gpu_memory_used": 1259.0,
+      "relative_time": 234.70528435707092
+    },
+    {
+      "timestamp": "2025-01-04T01:34:22.709825",
+      "cpu_percent": 52.14,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.490406036376953,
+      "gpu_memory_used": 1259.0,
+      "relative_time": 235.84367108345032
+    },
+    {
+      "timestamp": "2025-01-04T01:34:23.834912",
+      "cpu_percent": 49.39,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.49042510986328,
+      "gpu_memory_used": 1259.0,
+      "relative_time": 236.94777131080627
+    },
+    {
+      "timestamp": "2025-01-04T01:34:24.940884",
+      "cpu_percent": 51.84,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.489459991455078,
+      "gpu_memory_used": 1259.0,
+      "relative_time": 238.07107305526733
+    },
+    {
+      "timestamp": "2025-01-04T01:34:26.077527",
+      "cpu_percent": 49.55,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.488842010498047,
+      "gpu_memory_used": 1259.0,
+      "relative_time": 239.20314645767212
+    },
+    {
+      "timestamp": "2025-01-04T01:34:27.199360",
+      "cpu_percent": 47.71,
+      "ram_percent": 47.9,
+      "ram_used_gb": 30.49380874633789,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 240.32860612869263
+    },
+    {
+      "timestamp": "2025-01-04T01:34:28.333600",
+      "cpu_percent": 48.61,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.503887176513672,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 241.44983053207397
+    },
+    {
+      "timestamp": "2025-01-04T01:34:29.453855",
+      "cpu_percent": 51.01,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.512046813964844,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 242.60703372955322
+    },
+    {
+      "timestamp": "2025-01-04T01:34:30.613699",
+      "cpu_percent": 53.89,
+      "ram_percent": 48.0,
+      "ram_used_gb": 30.522415161132812,
+      "gpu_memory_used": 1258.0,
+      "relative_time": 243.73219799995422
+    },
+    {
+      "timestamp": "2025-01-04T01:34:31.735503",
+      "cpu_percent": 21.25,
+      "ram_percent": 48.2,
+      "ram_used_gb": 30.68771743774414,
+      "gpu_memory_used": 1260.0,
+      "relative_time": 244.80069231987
+    }
+  ],
+  "test_duration": 247.14976453781128
+}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt
new file mode 100644
index 0000000..541a304
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt
@@ -0,0 +1,23 @@
+=== Benchmark Statistics (with correct RTF) ===
+
+Total tokens processed: 1800
+Total audio generated (s): 568.53
+Total test duration (s): 244.10
+Average processing rate (tokens/s): 7.34
+Average RTF: 0.43
+Average Real Time Speed: 2.33
+
+=== Per-chunk Stats ===
+
+Average chunk size (tokens): 600.00
+Min chunk size (tokens): 300
+Max chunk size (tokens): 900
+Average processing time (s): 81.30
+Average output length (s): 189.51
+
+=== Performance Ranges ===
+
+Processing rate range (tokens/s): 7.21 - 7.47
+RTF range: 0.43x - 0.43x
+Real Time Speed range: 2.33x - 2.33x
+
diff --git a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json
new file mode 100644
index 0000000..ccac37e
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json
@@ -0,0 +1,1253 @@
+{
+  "results": [
+    {
+      "tokens": 150,
+      "processing_time": 1.86,
+      "output_length": 45.9,
+      "rtf": 0.04,
+      "elapsed_time": 1.92
+    },
+    {
+      "tokens": 300,
+      "processing_time": 3.08,
+      "output_length": 96.425,
+      "rtf": 0.03,
+      "elapsed_time": 5.06
+    },
+    {
+      "tokens": 450,
+      "processing_time": 4.4,
+      "output_length": 143.1,
+      "rtf": 0.03,
+      "elapsed_time": 9.53
+    },
+    {
+      "tokens": 600,
+      "processing_time": 6.47,
+      "output_length": 188.675,
+      "rtf": 0.03,
+      "elapsed_time": 16.06
+    },
+    {
+      "tokens": 750,
+      "processing_time": 8.32,
+      "output_length": 236.7,
+      "rtf": 0.04,
+      "elapsed_time": 24.45
+    },
+    {
+      "tokens": 900,
+      "processing_time": 8.92,
+      "output_length": 283.425,
+      "rtf": 0.03,
+      "elapsed_time": 33.45
+    },
+    {
+      "tokens": 2000,
+      "processing_time": 18.55,
+      "output_length": 624.325,
+      "rtf": 0.03,
+      "elapsed_time": 52.14
+    },
+    {
+      "tokens": 3000,
+      "processing_time": 23.98,
+      "output_length": 931.15,
+      "rtf": 0.03,
+      "elapsed_time": 76.32
+    },
+    {
+      "tokens": 4000,
+      "processing_time": 32.93,
+      "output_length": 1222.1,
+      "rtf": 0.03,
+      "elapsed_time": 109.53
+    },
+    {
+      "tokens": 5000,
+      "processing_time": 45.39,
+      "output_length": 1524.575,
+      "rtf": 0.03,
+      "elapsed_time": 155.23
+    }
+  ],
+  "system_metrics": [
+    {
+      "timestamp": "2025-01-04T02:37:52.172368",
+      "cpu_percent": 11.51,
+      "ram_percent": 52.8,
+      "ram_used_gb": 33.61172866821289,
+      "gpu_memory_used": 3216.0,
+      "relative_time": 0.08031892776489258
+    },
+    {
+      "timestamp": "2025-01-04T02:37:53.266071",
+      "cpu_percent": 15.33,
+      "ram_percent": 52.9,
+      "ram_used_gb": 33.678314208984375,
+      "gpu_memory_used": 3392.0,
+      "relative_time": 1.1673684120178223
+    },
+    {
+      "timestamp": "2025-01-04T02:37:54.352909",
+      "cpu_percent": 15.3,
+      "ram_percent": 53.0,
+      "ram_used_gb": 33.712764739990234,
+      "gpu_memory_used": 3667.0,
+      "relative_time": 2.253591537475586
+    },
+    {
+      "timestamp": "2025-01-04T02:37:55.439413",
+      "cpu_percent": 23.51,
+      "ram_percent": 52.7,
+      "ram_used_gb": 33.49789810180664,
+      "gpu_memory_used": 3662.0,
+      "relative_time": 3.3292760848999023
+    },
+    {
+      "timestamp": "2025-01-04T02:37:56.511211",
+      "cpu_percent": 14.69,
+      "ram_percent": 52.7,
+      "ram_used_gb": 33.494102478027344,
+      "gpu_memory_used": 3668.0,
+      "relative_time": 4.397106885910034
+    },
+    {
+      "timestamp": "2025-01-04T02:37:57.582176",
+      "cpu_percent": 11.01,
+      "ram_percent": 52.8,
+      "ram_used_gb": 33.564491271972656,
+      "gpu_memory_used": 3665.0,
+      "relative_time": 5.46670126914978
+    },
+    {
+      "timestamp": "2025-01-04T02:37:58.637969",
+      "cpu_percent": 15.04,
+      "ram_percent": 52.8,
+      "ram_used_gb": 33.555362701416016,
+      "gpu_memory_used": 3668.0,
+      "relative_time": 6.523184061050415
+    },
+    {
+      "timestamp": "2025-01-04T02:37:59.700880",
+      "cpu_percent": 13.32,
+      "ram_percent": 52.8,
+      "ram_used_gb": 33.559967041015625,
+      "gpu_memory_used": 3668.0,
+      "relative_time": 7.589032888412476
+    },
+    {
+      "timestamp": "2025-01-04T02:38:00.773895",
+      "cpu_percent": 12.45,
+      "ram_percent": 52.8,
+      "ram_used_gb": 33.609134674072266,
+      "gpu_memory_used": 3667.0,
+      "relative_time": 8.677486181259155
+    },
+    {
+      "timestamp": "2025-01-04T02:38:01.851195",
+      "cpu_percent": 12.62,
+      "ram_percent": 52.9,
+      "ram_used_gb": 33.67635726928711,
+      "gpu_memory_used": 3665.0,
+      "relative_time": 9.734971046447754
+    },
+    {
+      "timestamp": "2025-01-04T02:38:02.907897",
+      "cpu_percent": 20.61,
+      "ram_percent": 53.0,
+      "ram_used_gb": 33.72555160522461,
+      "gpu_memory_used": 3660.0,
+      "relative_time": 10.813292026519775
+    },
+    {
+      "timestamp": "2025-01-04T02:38:03.996322",
+      "cpu_percent": 33.24,
+      "ram_percent": 53.2,
+      "ram_used_gb": 33.832088470458984,
+      "gpu_memory_used": 3660.0,
+      "relative_time": 11.917856454849243
+    },
+    {
+      "timestamp": "2025-01-04T02:38:05.101973",
+      "cpu_percent": 14.24,
+      "ram_percent": 53.0,
+      "ram_used_gb": 33.7408447265625,
+      "gpu_memory_used": 3662.0,
+      "relative_time": 12.986546277999878
+    },
+    {
+      "timestamp": "2025-01-04T02:38:06.162037",
+      "cpu_percent": 14.38,
+      "ram_percent": 53.1,
+      "ram_used_gb": 33.774169921875,
+      "gpu_memory_used": 3662.0,
+      "relative_time": 14.062608242034912
+    },
+    {
+      "timestamp": "2025-01-04T02:38:07.248210",
+      "cpu_percent": 14.39,
+      "ram_percent": 53.2,
+      "ram_used_gb": 33.83738327026367,
+      "gpu_memory_used": 4029.0,
+      "relative_time": 15.156044960021973
+    },
+    {
+      "timestamp": "2025-01-04T02:38:08.329582",
+      "cpu_percent": 31.18,
+      "ram_percent": 53.2,
+      "ram_used_gb": 33.87126541137695,
+      "gpu_memory_used": 4032.0,
+      "relative_time": 16.249940395355225
+    },
+    {
+      "timestamp": "2025-01-04T02:38:09.432992",
+      "cpu_percent": 19.33,
+      "ram_percent": 53.2,
+      "ram_used_gb": 33.842403411865234,
+      "gpu_memory_used": 4032.0,
+      "relative_time": 17.331223011016846
+    },
+    {
+      "timestamp": "2025-01-04T02:38:10.505101",
+      "cpu_percent": 13.34,
+      "ram_percent": 53.2,
+      "ram_used_gb": 33.86738967895508,
+      "gpu_memory_used": 4029.0,
+      "relative_time": 18.390397548675537
+    },
+    {
+      "timestamp": "2025-01-04T02:38:11.570033",
+      "cpu_percent": 12.61,
+      "ram_percent": 53.4,
+      "ram_used_gb": 33.938289642333984,
+      "gpu_memory_used": 4028.0,
+      "relative_time": 19.477521181106567
+    },
+    {
+      "timestamp": "2025-01-04T02:38:12.663780",
+      "cpu_percent": 15.78,
+      "ram_percent": 53.4,
+      "ram_used_gb": 33.969398498535156,
+      "gpu_memory_used": 4030.0,
+      "relative_time": 20.57425808906555
+    },
+    {
+      "timestamp": "2025-01-04T02:38:13.750065",
+      "cpu_percent": 18.69,
+      "ram_percent": 53.5,
+      "ram_used_gb": 34.03954315185547,
+      "gpu_memory_used": 4021.0,
+      "relative_time": 21.652076244354248
+    },
+    {
+      "timestamp": "2025-01-04T02:38:14.825318",
+      "cpu_percent": 10.48,
+      "ram_percent": 53.6,
+      "ram_used_gb": 34.07048416137695,
+      "gpu_memory_used": 4025.0,
+      "relative_time": 22.73010230064392
+    },
+    {
+      "timestamp": "2025-01-04T02:38:15.912340",
+      "cpu_percent": 12.53,
+      "ram_percent": 53.6,
+      "ram_used_gb": 34.09389877319336,
+      "gpu_memory_used": 4026.0,
+      "relative_time": 23.81609869003296
+    },
+    {
+      "timestamp": "2025-01-04T02:38:17.003329",
+      "cpu_percent": 16.09,
+      "ram_percent": 53.7,
+      "ram_used_gb": 34.1781120300293,
+      "gpu_memory_used": 4025.0,
+      "relative_time": 24.90904140472412
+    },
+    {
+      "timestamp": "2025-01-04T02:38:18.079837",
+      "cpu_percent": 14.98,
+      "ram_percent": 53.8,
+      "ram_used_gb": 34.21260070800781,
+      "gpu_memory_used": 4025.0,
+      "relative_time": 25.986279249191284
+    },
+    {
+      "timestamp": "2025-01-04T02:38:19.167635",
+      "cpu_percent": 14.85,
+      "ram_percent": 53.8,
+      "ram_used_gb": 34.23923873901367,
+      "gpu_memory_used": 4024.0,
+      "relative_time": 27.076823234558105
+    },
+    {
+      "timestamp": "2025-01-04T02:38:20.258141",
+      "cpu_percent": 15.05,
+      "ram_percent": 53.9,
+      "ram_used_gb": 34.26483917236328,
+      "gpu_memory_used": 4015.0,
+      "relative_time": 28.144607067108154
+    },
+    {
+      "timestamp": "2025-01-04T02:38:21.315694",
+      "cpu_percent": 17.08,
+      "ram_percent": 53.9,
+      "ram_used_gb": 34.31473922729492,
+      "gpu_memory_used": 4016.0,
+      "relative_time": 29.20189356803894
+    },
+    {
+      "timestamp": "2025-01-04T02:38:22.388259",
+      "cpu_percent": 17.47,
+      "ram_percent": 54.0,
+      "ram_used_gb": 34.35490798950195,
+      "gpu_memory_used": 4016.0,
+      "relative_time": 30.28918957710266
+    },
+    {
+      "timestamp": "2025-01-04T02:38:23.463469",
+      "cpu_percent": 15.76,
+      "ram_percent": 54.0,
+      "ram_used_gb": 34.33717346191406,
+      "gpu_memory_used": 4002.0,
+      "relative_time": 31.364880561828613
+    },
+    {
+      "timestamp": "2025-01-04T02:38:24.540334",
+      "cpu_percent": 13.54,
+      "ram_percent": 54.1,
+      "ram_used_gb": 34.38197708129883,
+      "gpu_memory_used": 3999.0,
+      "relative_time": 32.4253191947937
+    },
+    {
+      "timestamp": "2025-01-04T02:38:25.597934",
+      "cpu_percent": 13.99,
+      "ram_percent": 54.2,
+      "ram_used_gb": 34.48365783691406,
+      "gpu_memory_used": 4004.0,
+      "relative_time": 33.50029754638672
+    },
+    {
+      "timestamp": "2025-01-04T02:38:26.673108",
+      "cpu_percent": 15.16,
+      "ram_percent": 54.2,
+      "ram_used_gb": 34.50083923339844,
+      "gpu_memory_used": 4011.0,
+      "relative_time": 34.5756139755249
+    },
+    {
+      "timestamp": "2025-01-04T02:38:27.748147",
+      "cpu_percent": 17.68,
+      "ram_percent": 54.2,
+      "ram_used_gb": 34.49884033203125,
+      "gpu_memory_used": 4016.0,
+      "relative_time": 35.650988817214966
+    },
+    {
+      "timestamp": "2025-01-04T02:38:28.835603",
+      "cpu_percent": 26.81,
+      "ram_percent": 54.3,
+      "ram_used_gb": 34.536773681640625,
+      "gpu_memory_used": 4015.0,
+      "relative_time": 36.73981595039368
+    },
+    {
+      "timestamp": "2025-01-04T02:38:29.912604",
+      "cpu_percent": 27.61,
+      "ram_percent": 54.3,
+      "ram_used_gb": 34.56916427612305,
+      "gpu_memory_used": 4016.0,
+      "relative_time": 37.81279993057251
+    },
+    {
+      "timestamp": "2025-01-04T02:38:30.984988",
+      "cpu_percent": 34.24,
+      "ram_percent": 54.4,
+      "ram_used_gb": 34.599365234375,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 38.89973425865173
+    },
+    {
+      "timestamp": "2025-01-04T02:38:32.071596",
+      "cpu_percent": 31.95,
+      "ram_percent": 54.2,
+      "ram_used_gb": 34.46506881713867,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 39.95902729034424
+    },
+    {
+      "timestamp": "2025-01-04T02:38:33.140836",
+      "cpu_percent": 27.78,
+      "ram_percent": 54.3,
+      "ram_used_gb": 34.51242446899414,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 41.0454580783844
+    },
+    {
+      "timestamp": "2025-01-04T02:38:34.229919",
+      "cpu_percent": 21.09,
+      "ram_percent": 54.3,
+      "ram_used_gb": 34.513973236083984,
+      "gpu_memory_used": 4011.0,
+      "relative_time": 42.133435010910034
+    },
+    {
+      "timestamp": "2025-01-04T02:38:35.317486",
+      "cpu_percent": 17.26,
+      "ram_percent": 53.9,
+      "ram_used_gb": 34.3167839050293,
+      "gpu_memory_used": 4020.0,
+      "relative_time": 43.21739077568054
+    },
+    {
+      "timestamp": "2025-01-04T02:38:36.394375",
+      "cpu_percent": 12.32,
+      "ram_percent": 54.0,
+      "ram_used_gb": 34.34043884277344,
+      "gpu_memory_used": 4020.0,
+      "relative_time": 44.27889919281006
+    },
+    {
+      "timestamp": "2025-01-04T02:38:37.454005",
+      "cpu_percent": 12.46,
+      "ram_percent": 54.0,
+      "ram_used_gb": 34.37453842163086,
+      "gpu_memory_used": 4020.0,
+      "relative_time": 45.341508626937866
+    },
+    {
+      "timestamp": "2025-01-04T02:38:38.515337",
+      "cpu_percent": 14.16,
+      "ram_percent": 54.1,
+      "ram_used_gb": 34.401729583740234,
+      "gpu_memory_used": 4019.0,
+      "relative_time": 46.410696506500244
+    },
+    {
+      "timestamp": "2025-01-04T02:38:39.593044",
+      "cpu_percent": 13.71,
+      "ram_percent": 54.1,
+      "ram_used_gb": 34.435630798339844,
+      "gpu_memory_used": 4019.0,
+      "relative_time": 47.48556661605835
+    },
+    {
+      "timestamp": "2025-01-04T02:38:40.665509",
+      "cpu_percent": 13.17,
+      "ram_percent": 54.2,
+      "ram_used_gb": 34.49795150756836,
+      "gpu_memory_used": 4016.0,
+      "relative_time": 48.551952838897705
+    },
+    {
+      "timestamp": "2025-01-04T02:38:41.724929",
+      "cpu_percent": 12.67,
+      "ram_percent": 54.3,
+      "ram_used_gb": 34.52568054199219,
+      "gpu_memory_used": 4011.0,
+      "relative_time": 49.61591196060181
+    },
+    {
+      "timestamp": "2025-01-04T02:38:42.801080",
+      "cpu_percent": 12.83,
+      "ram_percent": 54.4,
+      "ram_used_gb": 34.579071044921875,
+      "gpu_memory_used": 4007.0,
+      "relative_time": 50.70357823371887
+    },
+    {
+      "timestamp": "2025-01-04T02:38:43.884984",
+      "cpu_percent": 12.31,
+      "ram_percent": 54.4,
+      "ram_used_gb": 34.59829330444336,
+      "gpu_memory_used": 4003.0,
+      "relative_time": 51.771891832351685
+    },
+    {
+      "timestamp": "2025-01-04T02:38:44.957477",
+      "cpu_percent": 12.58,
+      "ram_percent": 54.7,
+      "ram_used_gb": 34.76633071899414,
+      "gpu_memory_used": 4003.0,
+      "relative_time": 52.859192848205566
+    },
+    {
+      "timestamp": "2025-01-04T02:38:46.031581",
+      "cpu_percent": 14.48,
+      "ram_percent": 54.6,
+      "ram_used_gb": 34.76308059692383,
+      "gpu_memory_used": 4013.0,
+      "relative_time": 53.91648840904236
+    },
+    {
+      "timestamp": "2025-01-04T02:38:47.091693",
+      "cpu_percent": 14.35,
+      "ram_percent": 54.7,
+      "ram_used_gb": 34.81193923950195,
+      "gpu_memory_used": 4013.0,
+      "relative_time": 54.993882179260254
+    },
+    {
+      "timestamp": "2025-01-04T02:38:48.178826",
+      "cpu_percent": 16.46,
+      "ram_percent": 54.7,
+      "ram_used_gb": 34.784278869628906,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 56.064146518707275
+    },
+    {
+      "timestamp": "2025-01-04T02:38:49.235997",
+      "cpu_percent": 12.84,
+      "ram_percent": 54.7,
+      "ram_used_gb": 34.79767608642578,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 57.12374472618103
+    },
+    {
+      "timestamp": "2025-01-04T02:38:50.295962",
+      "cpu_percent": 15.69,
+      "ram_percent": 54.8,
+      "ram_used_gb": 34.8546257019043,
+      "gpu_memory_used": 4013.0,
+      "relative_time": 58.180296421051025
+    },
+    {
+      "timestamp": "2025-01-04T02:38:51.357678",
+      "cpu_percent": 14.54,
+      "ram_percent": 54.8,
+      "ram_used_gb": 34.8900260925293,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 59.242270708084106
+    },
+    {
+      "timestamp": "2025-01-04T02:38:52.415380",
+      "cpu_percent": 14.74,
+      "ram_percent": 54.9,
+      "ram_used_gb": 34.92173767089844,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 60.307114601135254
+    },
+    {
+      "timestamp": "2025-01-04T02:38:53.490598",
+      "cpu_percent": 13.82,
+      "ram_percent": 55.1,
+      "ram_used_gb": 35.028907775878906,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 61.37576389312744
+    },
+    {
+      "timestamp": "2025-01-04T02:38:54.548660",
+      "cpu_percent": 11.31,
+      "ram_percent": 55.1,
+      "ram_used_gb": 35.05375289916992,
+      "gpu_memory_used": 4011.0,
+      "relative_time": 62.43392610549927
+    },
+    {
+      "timestamp": "2025-01-04T02:38:55.609900",
+      "cpu_percent": 14.35,
+      "ram_percent": 55.1,
+      "ram_used_gb": 35.03831100463867,
+      "gpu_memory_used": 4011.0,
+      "relative_time": 63.493370056152344
+    },
+    {
+      "timestamp": "2025-01-04T02:38:56.666032",
+      "cpu_percent": 13.11,
+      "ram_percent": 55.1,
+      "ram_used_gb": 35.07795333862305,
+      "gpu_memory_used": 4011.0,
+      "relative_time": 64.54955720901489
+    },
+    {
+      "timestamp": "2025-01-04T02:38:57.730782",
+      "cpu_percent": 16.01,
+      "ram_percent": 55.2,
+      "ram_used_gb": 35.11598587036133,
+      "gpu_memory_used": 4007.0,
+      "relative_time": 65.61445665359497
+    },
+    {
+      "timestamp": "2025-01-04T02:38:58.787051",
+      "cpu_percent": 13.68,
+      "ram_percent": 55.3,
+      "ram_used_gb": 35.15185546875,
+      "gpu_memory_used": 4007.0,
+      "relative_time": 66.67095923423767
+    },
+    {
+      "timestamp": "2025-01-04T02:38:59.847606",
+      "cpu_percent": 13.08,
+      "ram_percent": 55.3,
+      "ram_used_gb": 35.183753967285156,
+      "gpu_memory_used": 4003.0,
+      "relative_time": 67.73307466506958
+    },
+    {
+      "timestamp": "2025-01-04T02:39:00.918871",
+      "cpu_percent": 13.26,
+      "ram_percent": 55.4,
+      "ram_used_gb": 35.22275161743164,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 68.80590057373047
+    },
+    {
+      "timestamp": "2025-01-04T02:39:01.991170",
+      "cpu_percent": 11.05,
+      "ram_percent": 55.3,
+      "ram_used_gb": 35.15507125854492,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 69.8813705444336
+    },
+    {
+      "timestamp": "2025-01-04T02:39:03.058942",
+      "cpu_percent": 11.08,
+      "ram_percent": 55.4,
+      "ram_used_gb": 35.2095947265625,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 70.94484400749207
+    },
+    {
+      "timestamp": "2025-01-04T02:39:04.116159",
+      "cpu_percent": 12.74,
+      "ram_percent": 55.4,
+      "ram_used_gb": 35.24392318725586,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 72.00281810760498
+    },
+    {
+      "timestamp": "2025-01-04T02:39:05.187410",
+      "cpu_percent": 11.04,
+      "ram_percent": 55.4,
+      "ram_used_gb": 35.259830474853516,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 73.07217526435852
+    },
+    {
+      "timestamp": "2025-01-04T02:39:06.248588",
+      "cpu_percent": 13.66,
+      "ram_percent": 55.5,
+      "ram_used_gb": 35.29854965209961,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 74.13533973693848
+    },
+    {
+      "timestamp": "2025-01-04T02:39:07.321179",
+      "cpu_percent": 11.16,
+      "ram_percent": 55.6,
+      "ram_used_gb": 35.346981048583984,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 75.18772435188293
+    },
+    {
+      "timestamp": "2025-01-04T02:39:08.362496",
+      "cpu_percent": 9.46,
+      "ram_percent": 56.1,
+      "ram_used_gb": 35.69393539428711,
+      "gpu_memory_used": 4006.0,
+      "relative_time": 76.25136637687683
+    },
+    {
+      "timestamp": "2025-01-04T02:39:09.438218",
+      "cpu_percent": 15.88,
+      "ram_percent": 56.1,
+      "ram_used_gb": 35.6658821105957,
+      "gpu_memory_used": 4006.0,
+      "relative_time": 77.32329249382019
+    },
+    {
+      "timestamp": "2025-01-04T02:39:10.495653",
+      "cpu_percent": 13.25,
+      "ram_percent": 56.1,
+      "ram_used_gb": 35.658119201660156,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 78.38210940361023
+    },
+    {
+      "timestamp": "2025-01-04T02:39:11.560094",
+      "cpu_percent": 10.8,
+      "ram_percent": 56.1,
+      "ram_used_gb": 35.694610595703125,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 79.44816374778748
+    },
+    {
+      "timestamp": "2025-01-04T02:39:12.621879",
+      "cpu_percent": 12.58,
+      "ram_percent": 56.1,
+      "ram_used_gb": 35.68545150756836,
+      "gpu_memory_used": 4014.0,
+      "relative_time": 80.51017951965332
+    },
+    {
+      "timestamp": "2025-01-04T02:39:13.692834",
+      "cpu_percent": 13.3,
+      "ram_percent": 56.2,
+      "ram_used_gb": 35.730979919433594,
+      "gpu_memory_used": 4012.0,
+      "relative_time": 81.57789969444275
+    },
+    {
+      "timestamp": "2025-01-04T02:39:14.753401",
+      "cpu_percent": 14.75,
+      "ram_percent": 56.2,
+      "ram_used_gb": 35.73103332519531,
+      "gpu_memory_used": 4012.0,
+      "relative_time": 82.63830900192261
+    },
+    {
+      "timestamp": "2025-01-04T02:39:15.811385",
+      "cpu_percent": 14.69,
+      "ram_percent": 56.2,
+      "ram_used_gb": 35.740108489990234,
+      "gpu_memory_used": 4011.0,
+      "relative_time": 83.69796371459961
+    },
+    {
+      "timestamp": "2025-01-04T02:39:16.874197",
+      "cpu_percent": 14.28,
+      "ram_percent": 56.2,
+      "ram_used_gb": 35.767982482910156,
+      "gpu_memory_used": 4010.0,
+      "relative_time": 84.76145887374878
+    },
+    {
+      "timestamp": "2025-01-04T02:39:17.936944",
+      "cpu_percent": 12.99,
+      "ram_percent": 56.3,
+      "ram_used_gb": 35.81233596801758,
+      "gpu_memory_used": 4010.0,
+      "relative_time": 85.84119439125061
+    },
+    {
+      "timestamp": "2025-01-04T02:39:19.027320",
+      "cpu_percent": 12.79,
+      "ram_percent": 56.6,
+      "ram_used_gb": 36.0085563659668,
+      "gpu_memory_used": 4010.0,
+      "relative_time": 86.91442775726318
+    },
+    {
+      "timestamp": "2025-01-04T02:39:20.088804",
+      "cpu_percent": 15.62,
+      "ram_percent": 56.9,
+      "ram_used_gb": 36.16616439819336,
+      "gpu_memory_used": 4006.0,
+      "relative_time": 88.00494360923767
+    },
+    {
+      "timestamp": "2025-01-04T02:39:21.191796",
+      "cpu_percent": 12.98,
+      "ram_percent": 57.1,
+      "ram_used_gb": 36.3217658996582,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 89.12522411346436
+    },
+    {
+      "timestamp": "2025-01-04T02:39:22.311508",
+      "cpu_percent": 14.77,
+      "ram_percent": 56.9,
+      "ram_used_gb": 36.191429138183594,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 90.19932198524475
+    },
+    {
+      "timestamp": "2025-01-04T02:39:23.372871",
+      "cpu_percent": 12.98,
+      "ram_percent": 57.1,
+      "ram_used_gb": 36.29658889770508,
+      "gpu_memory_used": 4005.0,
+      "relative_time": 91.3045928478241
+    },
+    {
+      "timestamp": "2025-01-04T02:39:24.479087",
+      "cpu_percent": 14.64,
+      "ram_percent": 57.1,
+      "ram_used_gb": 36.30413055419922,
+      "gpu_memory_used": 3998.0,
+      "relative_time": 92.36360597610474
+    },
+    {
+      "timestamp": "2025-01-04T02:39:25.537969",
+      "cpu_percent": 14.51,
+      "ram_percent": 57.1,
+      "ram_used_gb": 36.311763763427734,
+      "gpu_memory_used": 3998.0,
+      "relative_time": 93.42230415344238
+    },
+    {
+      "timestamp": "2025-01-04T02:39:26.594967",
+      "cpu_percent": 13.05,
+      "ram_percent": 57.1,
+      "ram_used_gb": 36.351402282714844,
+      "gpu_memory_used": 3998.0,
+      "relative_time": 94.47847175598145
+    },
+    {
+      "timestamp": "2025-01-04T02:39:27.652223",
+      "cpu_percent": 15.05,
+      "ram_percent": 57.2,
+      "ram_used_gb": 36.36949157714844,
+      "gpu_memory_used": 4004.0,
+      "relative_time": 95.53560948371887
+    },
+    {
+      "timestamp": "2025-01-04T02:39:28.708008",
+      "cpu_percent": 12.51,
+      "ram_percent": 57.2,
+      "ram_used_gb": 36.3841667175293,
+      "gpu_memory_used": 4004.0,
+      "relative_time": 96.59472155570984
+    },
+    {
+      "timestamp": "2025-01-04T02:39:29.768866",
+      "cpu_percent": 10.83,
+      "ram_percent": 57.2,
+      "ram_used_gb": 36.39939880371094,
+      "gpu_memory_used": 4004.0,
+      "relative_time": 97.6679356098175
+    },
+    {
+      "timestamp": "2025-01-04T02:39:30.844295",
+      "cpu_percent": 14.61,
+      "ram_percent": 57.3,
+      "ram_used_gb": 36.42519760131836,
+      "gpu_memory_used": 4004.0,
+      "relative_time": 98.74996089935303
+    },
+    {
+      "timestamp": "2025-01-04T02:39:31.934080",
+      "cpu_percent": 11.74,
+      "ram_percent": 57.0,
+      "ram_used_gb": 36.271087646484375,
+      "gpu_memory_used": 4004.0,
+      "relative_time": 99.81860518455505
+    },
+    {
+      "timestamp": "2025-01-04T02:39:32.989954",
+      "cpu_percent": 12.09,
+      "ram_percent": 57.2,
+      "ram_used_gb": 36.368350982666016,
+      "gpu_memory_used": 4010.0,
+      "relative_time": 100.87712931632996
+    },
+    {
+      "timestamp": "2025-01-04T02:39:34.061411",
+      "cpu_percent": 11.07,
+      "ram_percent": 57.2,
+      "ram_used_gb": 36.38072967529297,
+      "gpu_memory_used": 4010.0,
+      "relative_time": 101.946035861969
+    },
+    {
+      "timestamp": "2025-01-04T02:39:35.117182",
+      "cpu_percent": 9.32,
+      "ram_percent": 57.2,
+      "ram_used_gb": 36.367733001708984,
+      "gpu_memory_used": 4415.0,
+      "relative_time": 103.00355505943298
+    },
+    {
+      "timestamp": "2025-01-04T02:39:36.179256",
+      "cpu_percent": 12.93,
+      "ram_percent": 57.2,
+      "ram_used_gb": 36.396636962890625,
+      "gpu_memory_used": 4417.0,
+      "relative_time": 104.06347131729126
+    },
+    {
+      "timestamp": "2025-01-04T02:39:37.237454",
+      "cpu_percent": 10.94,
+      "ram_percent": 57.3,
+      "ram_used_gb": 36.429630279541016,
+      "gpu_memory_used": 4417.0,
+      "relative_time": 105.12580728530884
+    },
+    {
+      "timestamp": "2025-01-04T02:39:38.310321",
+      "cpu_percent": 12.86,
+      "ram_percent": 57.3,
+      "ram_used_gb": 36.44291305541992,
+      "gpu_memory_used": 4418.0,
+      "relative_time": 106.17753839492798
+    },
+    {
+      "timestamp": "2025-01-04T02:39:39.355358",
+      "cpu_percent": 10.82,
+      "ram_percent": 57.3,
+      "ram_used_gb": 36.46603012084961,
+      "gpu_memory_used": 4418.0,
+      "relative_time": 107.24251008033752
+    },
+    {
+      "timestamp": "2025-01-04T02:39:40.413524",
+      "cpu_percent": 14.64,
+      "ram_percent": 57.4,
+      "ram_used_gb": 36.507179260253906,
+      "gpu_memory_used": 4418.0,
+      "relative_time": 108.29774165153503
+    },
+    {
+      "timestamp": "2025-01-04T02:39:41.482368",
+      "cpu_percent": 10.03,
+      "ram_percent": 58.1,
+      "ram_used_gb": 36.93812942504883,
+      "gpu_memory_used": 4418.0,
+      "relative_time": 109.36836910247803
+    },
+    {
+      "timestamp": "2025-01-04T02:39:42.546204",
+      "cpu_percent": 12.63,
+      "ram_percent": 58.0,
+      "ram_used_gb": 36.87542724609375,
+      "gpu_memory_used": 4418.0,
+      "relative_time": 110.43055510520935
+    },
+    {
+      "timestamp": "2025-01-04T02:39:43.604666",
+      "cpu_percent": 14.14,
+      "ram_percent": 58.0,
+      "ram_used_gb": 36.875328063964844,
+      "gpu_memory_used": 4426.0,
+      "relative_time": 111.49229407310486
+    },
+    {
+      "timestamp": "2025-01-04T02:39:44.664973",
+      "cpu_percent": 10.64,
+      "ram_percent": 58.0,
+      "ram_used_gb": 36.88217544555664,
+      "gpu_memory_used": 4425.0,
+      "relative_time": 112.55481696128845
+    },
+    {
+      "timestamp": "2025-01-04T02:39:45.741305",
+      "cpu_percent": 13.92,
+      "ram_percent": 57.9,
+      "ram_used_gb": 36.85449981689453,
+      "gpu_memory_used": 4425.0,
+      "relative_time": 113.62504053115845
+    },
+    {
+      "timestamp": "2025-01-04T02:39:46.799656",
+      "cpu_percent": 13.15,
+      "ram_percent": 58.0,
+      "ram_used_gb": 36.870826721191406,
+      "gpu_memory_used": 4423.0,
+      "relative_time": 114.6845052242279
+    },
+    {
+      "timestamp": "2025-01-04T02:39:47.859725",
+      "cpu_percent": 13.85,
+      "ram_percent": 58.0,
+      "ram_used_gb": 36.870948791503906,
+      "gpu_memory_used": 4423.0,
+      "relative_time": 115.74664235115051
+    },
+    {
+      "timestamp": "2025-01-04T02:39:48.919071",
+      "cpu_percent": 14.59,
+      "ram_percent": 58.0,
+      "ram_used_gb": 36.886802673339844,
+      "gpu_memory_used": 4422.0,
+      "relative_time": 116.80267906188965
+    },
+    {
+      "timestamp": "2025-01-04T02:39:49.976541",
+      "cpu_percent": 16.56,
+      "ram_percent": 58.0,
+      "ram_used_gb": 36.90068435668945,
+      "gpu_memory_used": 4422.0,
+      "relative_time": 117.86520886421204
+    },
+    {
+      "timestamp": "2025-01-04T02:39:51.036593",
+      "cpu_percent": 10.33,
+      "ram_percent": 58.1,
+      "ram_used_gb": 36.96821212768555,
+      "gpu_memory_used": 4416.0,
+      "relative_time": 118.92232513427734
+    },
+    {
+      "timestamp": "2025-01-04T02:39:52.098186",
+      "cpu_percent": 15.23,
+      "ram_percent": 58.1,
+      "ram_used_gb": 36.96358108520508,
+      "gpu_memory_used": 4416.0,
+      "relative_time": 119.98378920555115
+    },
+    {
+      "timestamp": "2025-01-04T02:39:53.168566",
+      "cpu_percent": 11.96,
+      "ram_percent": 58.2,
+      "ram_used_gb": 37.00669479370117,
+      "gpu_memory_used": 4416.0,
+      "relative_time": 121.05223441123962
+    },
+    {
+      "timestamp": "2025-01-04T02:39:54.230292",
+      "cpu_percent": 13.03,
+      "ram_percent": 58.2,
+      "ram_used_gb": 37.031307220458984,
+      "gpu_memory_used": 4416.0,
+      "relative_time": 122.11563086509705
+    },
+    {
+      "timestamp": "2025-01-04T02:39:55.287670",
+      "cpu_percent": 14.93,
+      "ram_percent": 58.2,
+      "ram_used_gb": 37.045589447021484,
+      "gpu_memory_used": 4416.0,
+      "relative_time": 123.17237305641174
+    },
+    {
+      "timestamp": "2025-01-04T02:39:56.349816",
+      "cpu_percent": 12.91,
+      "ram_percent": 58.3,
+      "ram_used_gb": 37.072689056396484,
+      "gpu_memory_used": 4416.0,
+      "relative_time": 124.23265671730042
+    },
+    {
+      "timestamp": "2025-01-04T02:39:57.409384",
+      "cpu_percent": 11.35,
+      "ram_percent": 58.3,
+      "ram_used_gb": 37.10455322265625,
+      "gpu_memory_used": 4416.0,
+      "relative_time": 125.29221749305725
+    },
+    {
+      "timestamp": "2025-01-04T02:39:58.464653",
+      "cpu_percent": 12.97,
+      "ram_percent": 58.4,
+      "ram_used_gb": 37.12955093383789,
+      "gpu_memory_used": 4416.0,
+      "relative_time": 126.34849739074707
+    },
+    {
+      "timestamp": "2025-01-04T02:39:59.521737",
+      "cpu_percent": 11.69,
+      "ram_percent": 58.4,
+      "ram_used_gb": 37.139190673828125,
+      "gpu_memory_used": 4416.0,
+      "relative_time": 127.40602111816406
+    },
+    {
+      "timestamp": "2025-01-04T02:40:00.581455",
+      "cpu_percent": 12.86,
+      "ram_percent": 58.5,
+      "ram_used_gb": 37.204673767089844,
+      "gpu_memory_used": 4418.0,
+      "relative_time": 128.5798671245575
+    },
+    {
+      "timestamp": "2025-01-04T02:40:01.760893",
+      "cpu_percent": 81.59,
+      "ram_percent": 58.1,
+      "ram_used_gb": 36.97315216064453,
+      "gpu_memory_used": 4425.0,
+      "relative_time": 129.6643455028534
+    },
+    {
+      "timestamp": "2025-01-04T02:40:02.850000",
+      "cpu_percent": 14.55,
+      "ram_percent": 58.2,
+      "ram_used_gb": 37.0354118347168,
+      "gpu_memory_used": 4435.0,
+      "relative_time": 130.7529788017273
+    },
+    {
+      "timestamp": "2025-01-04T02:40:03.934586",
+      "cpu_percent": 13.16,
+      "ram_percent": 58.2,
+      "ram_used_gb": 37.03787612915039,
+      "gpu_memory_used": 4437.0,
+      "relative_time": 131.81812405586243
+    },
+    {
+      "timestamp": "2025-01-04T02:40:04.989915",
+      "cpu_percent": 11.0,
+      "ram_percent": 58.3,
+      "ram_used_gb": 37.09538650512695,
+      "gpu_memory_used": 4437.0,
+      "relative_time": 132.88244915008545
+    },
+    {
+      "timestamp": "2025-01-04T02:40:06.067559",
+      "cpu_percent": 32.79,
+      "ram_percent": 58.3,
+      "ram_used_gb": 37.07184982299805,
+      "gpu_memory_used": 4437.0,
+      "relative_time": 133.97513842582703
+    },
+    {
+      "timestamp": "2025-01-04T02:40:07.156881",
+      "cpu_percent": 22.34,
+      "ram_percent": 58.3,
+      "ram_used_gb": 37.07517623901367,
+      "gpu_memory_used": 4438.0,
+      "relative_time": 135.04176831245422
+    },
+    {
+      "timestamp": "2025-01-04T02:40:08.212791",
+      "cpu_percent": 14.86,
+      "ram_percent": 58.3,
+      "ram_used_gb": 37.08013153076172,
+      "gpu_memory_used": 4438.0,
+      "relative_time": 136.1427457332611
+    },
+    {
+      "timestamp": "2025-01-04T02:40:09.317884",
+      "cpu_percent": 21.55,
+      "ram_percent": 58.4,
+      "ram_used_gb": 37.144142150878906,
+      "gpu_memory_used": 4447.0,
+      "relative_time": 137.20455360412598
+    },
+    {
+      "timestamp": "2025-01-04T02:40:10.390292",
+      "cpu_percent": 26.97,
+      "ram_percent": 58.4,
+      "ram_used_gb": 37.141868591308594,
+      "gpu_memory_used": 4454.0,
+      "relative_time": 138.2930736541748
+    },
+    {
+      "timestamp": "2025-01-04T02:40:11.464548",
+      "cpu_percent": 12.21,
+      "ram_percent": 58.5,
+      "ram_used_gb": 37.205867767333984,
+      "gpu_memory_used": 4451.0,
+      "relative_time": 139.35198616981506
+    },
+    {
+      "timestamp": "2025-01-04T02:40:12.537687",
+      "cpu_percent": 13.14,
+      "ram_percent": 58.5,
+      "ram_used_gb": 37.233299255371094,
+      "gpu_memory_used": 4452.0,
+      "relative_time": 140.4236707687378
+    },
+    {
+      "timestamp": "2025-01-04T02:40:13.608158",
+      "cpu_percent": 13.93,
+      "ram_percent": 58.6,
+      "ram_used_gb": 37.25624465942383,
+      "gpu_memory_used": 4452.0,
+      "relative_time": 141.4932518005371
+    },
+    {
+      "timestamp": "2025-01-04T02:40:14.668199",
+      "cpu_percent": 15.76,
+      "ram_percent": 58.6,
+      "ram_used_gb": 37.278499603271484,
+      "gpu_memory_used": 4452.0,
+      "relative_time": 142.57055759429932
+    },
+    {
+      "timestamp": "2025-01-04T02:40:15.754506",
+      "cpu_percent": 14.59,
+      "ram_percent": 58.5,
+      "ram_used_gb": 37.21017837524414,
+      "gpu_memory_used": 4451.0,
+      "relative_time": 143.64187097549438
+    },
+    {
+      "timestamp": "2025-01-04T02:40:16.827392",
+      "cpu_percent": 17.55,
+      "ram_percent": 58.4,
+      "ram_used_gb": 37.1205940246582,
+      "gpu_memory_used": 4450.0,
+      "relative_time": 144.75147438049316
+    },
+    {
+      "timestamp": "2025-01-04T02:40:17.929858",
+      "cpu_percent": 55.09,
+      "ram_percent": 58.2,
+      "ram_used_gb": 37.04216766357422,
+      "gpu_memory_used": 4449.0,
+      "relative_time": 145.815936088562
+    },
+    {
+      "timestamp": "2025-01-04T02:40:18.988009",
+      "cpu_percent": 13.92,
+      "ram_percent": 58.0,
+      "ram_used_gb": 36.90068054199219,
+      "gpu_memory_used": 4447.0,
+      "relative_time": 146.8880341053009
+    },
+    {
+      "timestamp": "2025-01-04T02:40:20.062567",
+      "cpu_percent": 17.42,
+      "ram_percent": 57.7,
+      "ram_used_gb": 36.69455337524414,
+      "gpu_memory_used": 4446.0,
+      "relative_time": 147.96440315246582
+    },
+    {
+      "timestamp": "2025-01-04T02:40:21.149129",
+      "cpu_percent": 14.78,
+      "ram_percent": 57.4,
+      "ram_used_gb": 36.50687789916992,
+      "gpu_memory_used": 4445.0,
+      "relative_time": 149.041100025177
+    },
+    {
+      "timestamp": "2025-01-04T02:40:22.221780",
+      "cpu_percent": 11.1,
+      "ram_percent": 57.0,
+      "ram_used_gb": 36.28267288208008,
+      "gpu_memory_used": 4438.0,
+      "relative_time": 150.125506401062
+    },
+    {
+      "timestamp": "2025-01-04T02:40:23.308492",
+      "cpu_percent": 12.21,
+      "ram_percent": 56.7,
+      "ram_used_gb": 36.036773681640625,
+      "gpu_memory_used": 4436.0,
+      "relative_time": 151.19524502754211
+    },
+    {
+      "timestamp": "2025-01-04T02:40:24.381177",
+      "cpu_percent": 13.79,
+      "ram_percent": 56.3,
+      "ram_used_gb": 35.83684539794922,
+      "gpu_memory_used": 4436.0,
+      "relative_time": 152.26534175872803
+    },
+    {
+      "timestamp": "2025-01-04T02:40:25.452457",
+      "cpu_percent": 12.28,
+      "ram_percent": 56.4,
+      "ram_used_gb": 35.848087310791016,
+      "gpu_memory_used": 4436.0,
+      "relative_time": 153.33880996704102
+    },
+    {
+      "timestamp": "2025-01-04T02:40:26.521613",
+      "cpu_percent": 12.52,
+      "ram_percent": 56.8,
+      "ram_used_gb": 36.1606330871582,
+      "gpu_memory_used": 4440.0,
+      "relative_time": 154.40920901298523
+    },
+    {
+      "timestamp": "2025-01-04T02:40:27.587547",
+      "cpu_percent": 7.94,
+      "ram_percent": 57.2,
+      "ram_used_gb": 36.37208557128906,
+      "gpu_memory_used": 4440.0,
+      "relative_time": 155.46942234039307
+    },
+    {
+      "timestamp": "2025-01-04T02:40:28.647400",
+      "cpu_percent": 8.85,
+      "ram_percent": 57.3,
+      "ram_used_gb": 36.470054626464844,
+      "gpu_memory_used": 4440.0,
+      "relative_time": 156.53129720687866
+    }
+  ],
+  "test_duration": 159.19756031036377
+}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt
new file mode 100644
index 0000000..cb2df6a
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt
@@ -0,0 +1,23 @@
+=== Benchmark Statistics (with correct RTF) ===
+
+Total tokens processed: 17150
+Total audio generated (s): 5296.38
+Total test duration (s): 155.23
+Average processing rate (tokens/s): 102.86
+Average RTF: 0.03
+Average Real Time Speed: 31.25
+
+=== Per-chunk Stats ===
+
+Average chunk size (tokens): 1715.00
+Min chunk size (tokens): 150
+Max chunk size (tokens): 5000
+Average processing time (s): 15.39
+Average output length (s): 529.64
+
+=== Performance Ranges ===
+
+Processing rate range (tokens/s): 80.65 - 125.10
+RTF range: 0.03x - 0.04x
+Real Time Speed range: 25.00x - 33.33x
+
diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png
new file mode 100644
index 0000000..339c896
Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png
new file mode 100644
index 0000000..3e5c8d1
Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png
new file mode 100644
index 0000000..e209978
Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png differ
diff --git a/examples/benchmarks/format_comparison.png b/examples/assorted_checks/benchmarks/output_plots/format_comparison.png
similarity index 100%
rename from examples/benchmarks/format_comparison.png
rename to examples/assorted_checks/benchmarks/output_plots/format_comparison.png
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png
new file mode 100644
index 0000000..62c6864
Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png
new file mode 100644
index 0000000..1c5d7b7
Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png
new file mode 100644
index 0000000..942b3a8
Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png differ
diff --git a/examples/benchmarks/gpu_usage.png b/examples/assorted_checks/benchmarks/output_plots/gpu_usage.png
similarity index 100%
rename from examples/benchmarks/gpu_usage.png
rename to examples/assorted_checks/benchmarks/output_plots/gpu_usage.png
diff --git a/examples/benchmarks/the_time_machine_hg_wells.txt b/examples/assorted_checks/benchmarks/the_time_machine_hg_wells.txt
similarity index 100%
rename from examples/benchmarks/the_time_machine_hg_wells.txt
rename to examples/assorted_checks/benchmarks/the_time_machine_hg_wells.txt
diff --git a/examples/test_analyze_combined_voices.py b/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py
similarity index 99%
rename from examples/test_analyze_combined_voices.py
rename to examples/assorted_checks/test_combinations/test_analyze_combined_voices.py
index 8db7865..ec280e2 100644
--- a/examples/test_analyze_combined_voices.py
+++ b/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py
@@ -332,8 +332,8 @@ def main():
     )
     parser.add_argument("--url", default="http://localhost:8880", help="API base URL")
     parser.add_argument(
-        "--output-dir",
-        default="examples/output",
+        "--output-dir", 
+        default="examples/assorted_checks/test_combinations/output",
         help="Output directory for audio files",
     )
     args = parser.parse_args()
diff --git a/examples/test_audio_formats.py b/examples/assorted_checks/test_formats/test_audio_formats.py
similarity index 100%
rename from examples/test_audio_formats.py
rename to examples/assorted_checks/test_formats/test_audio_formats.py
diff --git a/examples/test_openai_tts.py b/examples/assorted_checks/test_openai/test_openai_tts.py
similarity index 100%
rename from examples/test_openai_tts.py
rename to examples/assorted_checks/test_openai/test_openai_tts.py
diff --git a/examples/test_all_voices.py b/examples/assorted_checks/test_voices/test_all_voices.py
similarity index 100%
rename from examples/test_all_voices.py
rename to examples/assorted_checks/test_voices/test_all_voices.py
diff --git a/examples/assorted_checks/validate_wav.py b/examples/assorted_checks/validate_wav.py
new file mode 100644
index 0000000..d925e56
--- /dev/null
+++ b/examples/assorted_checks/validate_wav.py
@@ -0,0 +1,231 @@
+import numpy as np
+import soundfile as sf
+import argparse
+from pathlib import Path
+
+def validate_tts(wav_path: str) -> dict:
+    """
+    Quick validation checks for TTS-generated audio files to detect common artifacts.
+    
+    Checks for:
+    - Unnatural silence gaps
+    - Audio glitches and artifacts
+    - Repeated speech segments (stuck/looping)
+    - Abrupt changes in speech
+    - Audio quality issues
+    
+    Args:
+        wav_path: Path to audio file (wav, mp3, etc)
+    Returns:
+        Dictionary with validation results
+    """
+    try:
+        # Load audio
+        audio, sr = sf.read(wav_path)
+        if len(audio.shape) > 1:
+            audio = audio.mean(axis=1)  # Convert to mono
+            
+        # Basic audio stats
+        duration = len(audio) / sr
+        rms = np.sqrt(np.mean(audio**2))
+        peak = np.max(np.abs(audio))
+        dc_offset = np.mean(audio)
+        
+        # Calculate clipping stats if we're near peak
+        clip_count = np.sum(np.abs(audio) >= 0.99)
+        clip_percent = (clip_count / len(audio)) * 100
+        if clip_percent > 0:
+            clip_stats = f" ({clip_percent:.2e} ratio near peak)"
+        else:
+            clip_stats = " (no samples near peak)"
+        
+        # Convert to dB for analysis
+        eps = np.finfo(float).eps
+        db = 20 * np.log10(np.abs(audio) + eps)
+        
+        issues = []
+        
+        # Check if audio is too short (likely failed generation)
+        if duration < 0.1:  # Less than 100ms
+            issues.append("WARNING: Audio is suspiciously short - possible failed generation")
+        
+        # 1. Check for basic audio quality
+        if peak >= 1.0:
+            # Calculate percentage of samples that are clipping
+            clip_count = np.sum(np.abs(audio) >= 0.99)
+            clip_percent = (clip_count / len(audio)) * 100
+            
+            if clip_percent > 1.0:  # Only warn if more than 1% of samples clip
+                issues.append(f"WARNING: Significant clipping detected ({clip_percent:.2e}% of samples)")
+            elif clip_percent > 0.01:  # Add info if more than 0.01% but less than 1%
+                issues.append(f"INFO: Minor peak limiting detected ({clip_percent:.2e}% of samples) - likely intentional normalization")
+            
+        if rms < 0.01:
+            issues.append("WARNING: Audio is very quiet - possible failed generation")
+        if abs(dc_offset) > 0.1:  # DC offset is particularly bad for speech
+            issues.append(f"WARNING: High DC offset ({dc_offset:.3f}) - may cause audio artifacts")
+            
+        # 2. Check for long silence gaps (potential TTS failures)
+        silence_threshold = -45  # dB
+        min_silence = 2.0  # Only detect silences longer than 2 seconds
+        window_size = int(min_silence * sr)
+        silence_count = 0
+        last_silence = -1
+        
+        # Skip the first 0.2s for silence detection (avoid false positives at start)
+        start_idx = int(0.2 * sr)
+        for i in range(start_idx, len(db) - window_size, window_size):
+            window = db[i:i+window_size]
+            if np.mean(window) < silence_threshold:
+                # Verify the entire window is mostly silence
+                silent_ratio = np.mean(window < silence_threshold)
+                if silent_ratio > 0.9:  # 90% of the window should be below threshold
+                    if last_silence == -1 or (i/sr - last_silence) > 2.0:  # Only count silences more than 2s apart
+                        silence_count += 1
+                        last_silence = i/sr
+                        issues.append(f"WARNING: Long silence detected at {i/sr:.2f}s (duration: {min_silence:.1f}s)")
+        
+        if silence_count > 2:  # Only warn if there are multiple long silences
+            issues.append(f"WARNING: Multiple long silences found ({silence_count} total) - possible generation issue")
+                
+        # 3. Check for extreme audio artifacts (changes too rapid for natural speech)
+        # Use a longer window to avoid flagging normal phoneme transitions
+        window_size = int(0.02 * sr)  # 20ms window
+        db_smooth = np.convolve(db, np.ones(window_size)/window_size, 'same')
+        db_diff = np.abs(np.diff(db_smooth))
+        
+        # Much higher threshold to only catch truly unnatural changes
+        artifact_threshold = 40  # dB
+        min_duration = int(0.01 * sr)  # Minimum 10ms duration
+        
+        # Find regions where the smoothed dB change is extreme
+        artifact_points = np.where(db_diff > artifact_threshold)[0]
+        
+        if len(artifact_points) > 0:
+            # Group artifacts that are very close together
+            grouped_artifacts = []
+            current_group = [artifact_points[0]]
+            
+            for i in range(1, len(artifact_points)):
+                if (artifact_points[i] - current_group[-1]) < min_duration:
+                    current_group.append(artifact_points[i])
+                else:
+                    if len(current_group) * (1/sr) >= 0.01:  # Only keep groups lasting >= 10ms
+                        grouped_artifacts.append(current_group)
+                    current_group = [artifact_points[i]]
+            
+            if len(current_group) * (1/sr) >= 0.01:
+                grouped_artifacts.append(current_group)
+            
+            # Report only the most severe artifacts
+            for group in grouped_artifacts[:2]:  # Report up to 2 worst artifacts
+                center_idx = group[len(group)//2]
+                db_change = db_diff[center_idx]
+                if db_change > 45:  # Only report very extreme changes
+                    issues.append(
+                        f"WARNING: Possible audio artifact at {center_idx/sr:.2f}s "
+                        f"({db_change:.1f}dB change over {len(group)/sr*1000:.0f}ms)"
+                    )
+            
+        # 4. Check for repeated speech segments (stuck/looping)
+        # Check both short and long sentence durations at audiobook speed (150-160 wpm)
+        for chunk_duration in [5.0, 10.0]:  # 5s (~12 words) and 10s (~25 words) at ~audiobook speed
+            chunk_size = int(chunk_duration * sr)
+            overlap = int(0.2 * chunk_size)  # 20% overlap between chunks
+            
+            for i in range(0, len(audio) - 2*chunk_size, overlap):
+                chunk1 = audio[i:i+chunk_size]
+                chunk2 = audio[i+chunk_size:i+2*chunk_size]
+                
+                # Ignore chunks that are mostly silence
+                if np.mean(np.abs(chunk1)) < 0.01 or np.mean(np.abs(chunk2)) < 0.01:
+                    continue
+                    
+                try:
+                    correlation = np.corrcoef(chunk1, chunk2)[0,1]
+                    if not np.isnan(correlation) and correlation > 0.92:  # Lower threshold for sentence-length chunks
+                        issues.append(
+                            f"WARNING: Possible repeated speech at {i/sr:.1f}s "
+                            f"(~{int(chunk_duration*160/60):d} words, correlation: {correlation:.3f})"
+                        )
+                        break  # Found repetition at this duration, try next duration
+                except:
+                    continue
+        
+        # 5. Check for extreme amplitude discontinuities (common in failed TTS)
+        amplitude_envelope = np.abs(audio)
+        window_size = sr // 10  # 100ms window for smoother envelope
+        smooth_env = np.convolve(amplitude_envelope, np.ones(window_size)/float(window_size), 'same')
+        env_diff = np.abs(np.diff(smooth_env))
+        
+        # Only detect very extreme amplitude changes
+        jump_threshold = 0.5  # Much higher threshold
+        jumps = np.where(env_diff > jump_threshold)[0]
+        
+        if len(jumps) > 0:
+            # Group jumps that are close together
+            grouped_jumps = []
+            current_group = [jumps[0]]
+            
+            for i in range(1, len(jumps)):
+                if (jumps[i] - current_group[-1]) < 0.05 * sr:  # Group within 50ms
+                    current_group.append(jumps[i])
+                else:
+                    if len(current_group) >= 3:  # Only keep significant discontinuities
+                        grouped_jumps.append(current_group)
+                    current_group = [jumps[i]]
+            
+            if len(current_group) >= 3:
+                grouped_jumps.append(current_group)
+            
+            # Report only the most severe discontinuities
+            for group in grouped_jumps[:2]:  # Report up to 2 worst cases
+                center_idx = group[len(group)//2]
+                jump_size = env_diff[center_idx]
+                if jump_size > 0.6:  # Only report very extreme changes
+                    issues.append(
+                        f"WARNING: Possible audio discontinuity at {center_idx/sr:.2f}s "
+                        f"({jump_size:.2f} amplitude ratio change)"
+                    )
+        
+        return {
+            "file": wav_path,
+            "duration": f"{duration:.2f}s",
+            "sample_rate": sr,
+            "peak_amplitude": f"{peak:.3f}{clip_stats}",
+            "rms_level": f"{rms:.3f}",
+            "dc_offset": f"{dc_offset:.3f}",
+            "issues": issues,
+            "valid": len(issues) == 0
+        }
+        
+    except Exception as e:
+        return {
+            "file": wav_path,
+            "error": str(e),
+            "valid": False
+        }
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="TTS Output Validator")
+    parser.add_argument("wav_file", help="Path to audio file to validate")
+    args = parser.parse_args()
+    
+    result = validate_tts(args.wav_file)
+    
+    print(f"\nValidating: {result['file']}")
+    if "error" in result:
+        print(f"Error: {result['error']}")
+    else:
+        print(f"Duration: {result['duration']}")
+        print(f"Sample Rate: {result['sample_rate']} Hz")
+        print(f"Peak Amplitude: {result['peak_amplitude']}")
+        print(f"RMS Level: {result['rms_level']}")
+        print(f"DC Offset: {result['dc_offset']}")
+        
+        if result["issues"]:
+            print("\nIssues Found:")
+            for issue in result["issues"]:
+                print(f"- {issue}")
+        else:
+            print("\nNo issues found")
diff --git a/examples/assorted_checks/validate_wavs.py b/examples/assorted_checks/validate_wavs.py
new file mode 100644
index 0000000..a37c043
--- /dev/null
+++ b/examples/assorted_checks/validate_wavs.py
@@ -0,0 +1,72 @@
+import argparse
+from pathlib import Path
+from validate_wav import validate_tts
+
+def print_validation_result(result: dict, rel_path: Path):
+    """Print full validation details for a single file."""
+    print(f"\nValidating: {rel_path}")
+    if "error" in result:
+        print(f"Error: {result['error']}")
+    else:
+        print(f"Duration: {result['duration']}")
+        print(f"Sample Rate: {result['sample_rate']} Hz")
+        print(f"Peak Amplitude: {result['peak_amplitude']}")
+        print(f"RMS Level: {result['rms_level']}")
+        print(f"DC Offset: {result['dc_offset']}")
+        
+        if result["issues"]:
+            print("\nIssues Found:")
+            for issue in result["issues"]:
+                print(f"- {issue}")
+        else:
+            print("\nNo issues found")
+
+def validate_directory(directory: str):
+    """Validate all wav files in a directory with detailed output and summary."""
+    dir_path = Path(directory)
+    
+    # Find all wav files (including nested directories)
+    wav_files = list(dir_path.rglob("*.wav"))
+    wav_files.extend(dir_path.rglob("*.mp3"))  # Also check mp3s
+    wav_files = sorted(wav_files)
+    
+    if not wav_files:
+        print(f"No .wav or .mp3 files found in {directory}")
+        return
+        
+    print(f"Found {len(wav_files)} files in {directory}")
+    print("=" * 80)
+    
+    # Store results for summary
+    results = []
+    
+    # Detailed validation output
+    for wav_file in wav_files:
+        result = validate_tts(str(wav_file))
+        rel_path = wav_file.relative_to(dir_path)
+        print_validation_result(result, rel_path)
+        results.append((rel_path, result))
+        print("=" * 80)
+    
+    # Summary with detailed issues
+    print("\nSUMMARY:")
+    for rel_path, result in results:
+        if "error" in result:
+            print(f"{rel_path}: ERROR - {result['error']}")
+        elif result["issues"]:
+            # Show first issue in summary, indicate if there are more
+            issues = result["issues"]
+            first_issue = issues[0].replace("WARNING: ", "")
+            if len(issues) > 1:
+                print(f"{rel_path}: FAIL - {first_issue} (+{len(issues)-1} more issues)")
+            else:
+                print(f"{rel_path}: FAIL - {first_issue}")
+        else:
+            print(f"{rel_path}: PASS")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Batch validate TTS wav files")
+    parser.add_argument("directory", help="Directory containing wav files to validate")
+    args = parser.parse_args()
+    
+    validate_directory(args.directory)
diff --git a/examples/benchmarks/analysis_comparison.png b/examples/benchmarks/analysis_comparison.png
deleted file mode 100644
index 87a6d13..0000000
Binary files a/examples/benchmarks/analysis_comparison.png and /dev/null differ
diff --git a/examples/benchmarks/benchmark_results.json b/examples/benchmarks/benchmark_results.json
deleted file mode 100644
index 373cebe..0000000
--- a/examples/benchmarks/benchmark_results.json
+++ /dev/null
@@ -1,531 +0,0 @@
-{
-  "results": [
-    {
-      "tokens": 100,
-      "processing_time": 8.54442310333252,
-      "output_length": 31.15,
-      "realtime_factor": 3.6456527987068887,
-      "elapsed_time": 8.720048666000366
-    },
-    {
-      "tokens": 200,
-      "processing_time": 1.3838517665863037,
-      "output_length": 62.6,
-      "realtime_factor": 45.236058883981606,
-      "elapsed_time": 10.258155345916748
-    },
-    {
-      "tokens": 300,
-      "processing_time": 2.2024788856506348,
-      "output_length": 96.325,
-      "realtime_factor": 43.73481200095347,
-      "elapsed_time": 12.594647407531738
-    },
-    {
-      "tokens": 400,
-      "processing_time": 3.175424098968506,
-      "output_length": 128.55,
-      "realtime_factor": 40.48278150995886,
-      "elapsed_time": 16.005898475646973
-    },
-    {
-      "tokens": 500,
-      "processing_time": 3.205301523208618,
-      "output_length": 158.55,
-      "realtime_factor": 49.46492517224587,
-      "elapsed_time": 19.377076625823975
-    },
-    {
-      "tokens": 600,
-      "processing_time": 3.9976348876953125,
-      "output_length": 189.225,
-      "realtime_factor": 47.33423769700254,
-      "elapsed_time": 23.568575859069824
-    },
-    {
-      "tokens": 700,
-      "processing_time": 4.98036003112793,
-      "output_length": 222.05,
-      "realtime_factor": 44.58513011351734,
-      "elapsed_time": 28.767319917678833
-    },
-    {
-      "tokens": 800,
-      "processing_time": 5.156893491744995,
-      "output_length": 253.825,
-      "realtime_factor": 49.22052402406907,
-      "elapsed_time": 34.1369092464447
-    },
-    {
-      "tokens": 900,
-      "processing_time": 5.8110880851745605,
-      "output_length": 283.75,
-      "realtime_factor": 48.82906537312906,
-      "elapsed_time": 40.16419458389282
-    },
-    {
-      "tokens": 1000,
-      "processing_time": 6.686216354370117,
-      "output_length": 315.45,
-      "realtime_factor": 47.17914935460046,
-      "elapsed_time": 47.11375427246094
-    },
-    {
-      "tokens": 2000,
-      "processing_time": 13.290695905685425,
-      "output_length": 624.925,
-      "realtime_factor": 47.01973504131358,
-      "elapsed_time": 60.842002630233765
-    },
-    {
-      "tokens": 3000,
-      "processing_time": 20.058005571365356,
-      "output_length": 932.05,
-      "realtime_factor": 46.46773063671828,
-      "elapsed_time": 81.50969815254211
-    },
-    {
-      "tokens": 4000,
-      "processing_time": 26.38338828086853,
-      "output_length": 1222.975,
-      "realtime_factor": 46.353978002394015,
-      "elapsed_time": 108.76348638534546
-    },
-    {
-      "tokens": 5000,
-      "processing_time": 32.472310066223145,
-      "output_length": 1525.15,
-      "realtime_factor": 46.967708699801484,
-      "elapsed_time": 142.2994668483734
-    },
-    {
-      "tokens": 6000,
-      "processing_time": 42.67592263221741,
-      "output_length": 1837.525,
-      "realtime_factor": 43.0576514030137,
-      "elapsed_time": 186.26759266853333
-    },
-    {
-      "tokens": 7000,
-      "processing_time": 51.601537466049194,
-      "output_length": 2146.875,
-      "realtime_factor": 41.60486499869347,
-      "elapsed_time": 239.59922289848328
-    },
-    {
-      "tokens": 8000,
-      "processing_time": 51.86434292793274,
-      "output_length": 2458.425,
-      "realtime_factor": 47.401063258741466,
-      "elapsed_time": 293.4462616443634
-    },
-    {
-      "tokens": 9000,
-      "processing_time": 60.4497971534729,
-      "output_length": 2772.1,
-      "realtime_factor": 45.857887545297416,
-      "elapsed_time": 356.02399826049805
-    },
-    {
-      "tokens": 10000,
-      "processing_time": 71.75962543487549,
-      "output_length": 3085.625,
-      "realtime_factor": 42.99945800024164,
-      "elapsed_time": 430.50863671302795
-    },
-    {
-      "tokens": 11000,
-      "processing_time": 96.66409230232239,
-      "output_length": 3389.3,
-      "realtime_factor": 35.062657904030935,
-      "elapsed_time": 529.3296246528625
-    },
-    {
-      "tokens": 12000,
-      "processing_time": 85.70126295089722,
-      "output_length": 3703.175,
-      "realtime_factor": 43.21027336693678,
-      "elapsed_time": 618.0248212814331
-    },
-    {
-      "tokens": 13000,
-      "processing_time": 97.2874686717987,
-      "output_length": 4030.825,
-      "realtime_factor": 41.43210893479068,
-      "elapsed_time": 717.9070522785187
-    },
-    {
-      "tokens": 14000,
-      "processing_time": 105.1045708656311,
-      "output_length": 4356.775,
-      "realtime_factor": 41.451812838566596,
-      "elapsed_time": 826.1140224933624
-    },
-    {
-      "tokens": 15000,
-      "processing_time": 111.0716404914856,
-      "output_length": 4663.325,
-      "realtime_factor": 41.984839508672565,
-      "elapsed_time": 940.0645899772644
-    },
-    {
-      "tokens": 16000,
-      "processing_time": 116.61742973327637,
-      "output_length": 4978.65,
-      "realtime_factor": 42.692160266154104,
-      "elapsed_time": 1061.1957621574402
-    }
-  ],
-  "system_metrics": [
-    {
-      "timestamp": "2024-12-31T03:12:36.009478",
-      "cpu_percent": 8.1,
-      "ram_percent": 66.8,
-      "ram_used_gb": 42.47850799560547,
-      "gpu_memory_used": 2124.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:44.639678",
-      "cpu_percent": 7.7,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.984352111816406,
-      "gpu_memory_used": 3486.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:44.731107",
-      "cpu_percent": 8.3,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.97468948364258,
-      "gpu_memory_used": 3484.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:46.189723",
-      "cpu_percent": 14.2,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.98275375366211,
-      "gpu_memory_used": 3697.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:46.265437",
-      "cpu_percent": 4.7,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.982975006103516,
-      "gpu_memory_used": 3697.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:48.536216",
-      "cpu_percent": 12.5,
-      "ram_percent": 69.0,
-      "ram_used_gb": 43.86142349243164,
-      "gpu_memory_used": 3697.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:48.603827",
-      "cpu_percent": 6.2,
-      "ram_percent": 69.0,
-      "ram_used_gb": 43.8692626953125,
-      "gpu_memory_used": 3694.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:51.905764",
-      "cpu_percent": 14.2,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.93961715698242,
-      "gpu_memory_used": 3690.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:52.028178",
-      "cpu_percent": 26.0,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.944759368896484,
-      "gpu_memory_used": 3690.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:55.320709",
-      "cpu_percent": 13.2,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.943058013916016,
-      "gpu_memory_used": 3685.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:55.386582",
-      "cpu_percent": 3.2,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.9305419921875,
-      "gpu_memory_used": 3685.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:59.492304",
-      "cpu_percent": 15.6,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.964195251464844,
-      "gpu_memory_used": 4053.0
-    },
-    {
-      "timestamp": "2024-12-31T03:12:59.586143",
-      "cpu_percent": 2.1,
-      "ram_percent": 69.1,
-      "ram_used_gb": 43.9642448425293,
-      "gpu_memory_used": 4053.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:04.705286",
-      "cpu_percent": 12.0,
-      "ram_percent": 69.2,
-      "ram_used_gb": 43.992374420166016,
-      "gpu_memory_used": 4059.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:04.779475",
-      "cpu_percent": 4.7,
-      "ram_percent": 69.2,
-      "ram_used_gb": 43.9922981262207,
-      "gpu_memory_used": 4059.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:10.063292",
-      "cpu_percent": 12.4,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.004146575927734,
-      "gpu_memory_used": 4041.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:10.155395",
-      "cpu_percent": 6.8,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.004215240478516,
-      "gpu_memory_used": 4041.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:16.097887",
-      "cpu_percent": 13.1,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.0260009765625,
-      "gpu_memory_used": 4042.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:16.171478",
-      "cpu_percent": 4.5,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.02027130126953,
-      "gpu_memory_used": 4042.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:23.044945",
-      "cpu_percent": 12.6,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.03746795654297,
-      "gpu_memory_used": 4044.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:23.127442",
-      "cpu_percent": 8.3,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.0373420715332,
-      "gpu_memory_used": 4044.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:36.780309",
-      "cpu_percent": 12.5,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.00790786743164,
-      "gpu_memory_used": 4034.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:36.853474",
-      "cpu_percent": 6.2,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.00779724121094,
-      "gpu_memory_used": 4034.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:57.449274",
-      "cpu_percent": 12.4,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.0432243347168,
-      "gpu_memory_used": 4034.0
-    },
-    {
-      "timestamp": "2024-12-31T03:13:57.524592",
-      "cpu_percent": 6.2,
-      "ram_percent": 69.2,
-      "ram_used_gb": 44.03204345703125,
-      "gpu_memory_used": 4034.0
-    },
-    {
-      "timestamp": "2024-12-31T03:14:24.698822",
-      "cpu_percent": 13.4,
-      "ram_percent": 69.5,
-      "ram_used_gb": 44.18327331542969,
-      "gpu_memory_used": 4480.0
-    },
-    {
-      "timestamp": "2024-12-31T03:14:24.783683",
-      "cpu_percent": 4.2,
-      "ram_percent": 69.5,
-      "ram_used_gb": 44.182212829589844,
-      "gpu_memory_used": 4480.0
-    },
-    {
-      "timestamp": "2024-12-31T03:14:58.242642",
-      "cpu_percent": 12.8,
-      "ram_percent": 69.5,
-      "ram_used_gb": 44.20225524902344,
-      "gpu_memory_used": 4476.0
-    },
-    {
-      "timestamp": "2024-12-31T03:14:58.310907",
-      "cpu_percent": 2.9,
-      "ram_percent": 69.5,
-      "ram_used_gb": 44.19659423828125,
-      "gpu_memory_used": 4476.0
-    },
-    {
-      "timestamp": "2024-12-31T03:15:42.196813",
-      "cpu_percent": 14.3,
-      "ram_percent": 69.9,
-      "ram_used_gb": 44.43781661987305,
-      "gpu_memory_used": 4494.0
-    },
-    {
-      "timestamp": "2024-12-31T03:15:42.288427",
-      "cpu_percent": 13.7,
-      "ram_percent": 69.9,
-      "ram_used_gb": 44.439701080322266,
-      "gpu_memory_used": 4494.0
-    },
-    {
-      "timestamp": "2024-12-31T03:16:35.483849",
-      "cpu_percent": 14.7,
-      "ram_percent": 65.0,
-      "ram_used_gb": 41.35385513305664,
-      "gpu_memory_used": 4506.0
-    },
-    {
-      "timestamp": "2024-12-31T03:16:35.626628",
-      "cpu_percent": 32.9,
-      "ram_percent": 65.0,
-      "ram_used_gb": 41.34442138671875,
-      "gpu_memory_used": 4506.0
-    },
-    {
-      "timestamp": "2024-12-31T03:17:29.378353",
-      "cpu_percent": 13.4,
-      "ram_percent": 64.3,
-      "ram_used_gb": 40.8721809387207,
-      "gpu_memory_used": 4485.0
-    },
-    {
-      "timestamp": "2024-12-31T03:17:29.457464",
-      "cpu_percent": 5.1,
-      "ram_percent": 64.3,
-      "ram_used_gb": 40.875389099121094,
-      "gpu_memory_used": 4485.0
-    },
-    {
-      "timestamp": "2024-12-31T03:18:31.955862",
-      "cpu_percent": 14.3,
-      "ram_percent": 65.0,
-      "ram_used_gb": 41.360206604003906,
-      "gpu_memory_used": 4484.0
-    },
-    {
-      "timestamp": "2024-12-31T03:18:32.038999",
-      "cpu_percent": 12.5,
-      "ram_percent": 65.0,
-      "ram_used_gb": 41.37223434448242,
-      "gpu_memory_used": 4484.0
-    },
-    {
-      "timestamp": "2024-12-31T03:19:46.454105",
-      "cpu_percent": 13.9,
-      "ram_percent": 65.3,
-      "ram_used_gb": 41.562198638916016,
-      "gpu_memory_used": 4487.0
-    },
-    {
-      "timestamp": "2024-12-31T03:19:46.524303",
-      "cpu_percent": 6.8,
-      "ram_percent": 65.3,
-      "ram_used_gb": 41.56681442260742,
-      "gpu_memory_used": 4487.0
-    },
-    {
-      "timestamp": "2024-12-31T03:21:25.251452",
-      "cpu_percent": 23.7,
-      "ram_percent": 62.0,
-      "ram_used_gb": 39.456459045410156,
-      "gpu_memory_used": 4488.0
-    },
-    {
-      "timestamp": "2024-12-31T03:21:25.348643",
-      "cpu_percent": 2.9,
-      "ram_percent": 62.0,
-      "ram_used_gb": 39.454288482666016,
-      "gpu_memory_used": 4487.0
-    },
-    {
-      "timestamp": "2024-12-31T03:22:53.939896",
-      "cpu_percent": 12.9,
-      "ram_percent": 62.1,
-      "ram_used_gb": 39.50320053100586,
-      "gpu_memory_used": 4488.0
-    },
-    {
-      "timestamp": "2024-12-31T03:22:54.041607",
-      "cpu_percent": 8.3,
-      "ram_percent": 62.1,
-      "ram_used_gb": 39.49895095825195,
-      "gpu_memory_used": 4488.0
-    },
-    {
-      "timestamp": "2024-12-31T03:24:33.835432",
-      "cpu_percent": 12.9,
-      "ram_percent": 62.3,
-      "ram_used_gb": 39.647212982177734,
-      "gpu_memory_used": 4503.0
-    },
-    {
-      "timestamp": "2024-12-31T03:24:33.923914",
-      "cpu_percent": 7.6,
-      "ram_percent": 62.3,
-      "ram_used_gb": 39.64302062988281,
-      "gpu_memory_used": 4503.0
-    },
-    {
-      "timestamp": "2024-12-31T03:26:22.021598",
-      "cpu_percent": 12.9,
-      "ram_percent": 58.4,
-      "ram_used_gb": 37.162540435791016,
-      "gpu_memory_used": 4491.0
-    },
-    {
-      "timestamp": "2024-12-31T03:26:22.142138",
-      "cpu_percent": 12.0,
-      "ram_percent": 58.4,
-      "ram_used_gb": 37.162010192871094,
-      "gpu_memory_used": 4487.0
-    },
-    {
-      "timestamp": "2024-12-31T03:28:15.970365",
-      "cpu_percent": 15.0,
-      "ram_percent": 58.2,
-      "ram_used_gb": 37.04011535644531,
-      "gpu_memory_used": 4481.0
-    },
-    {
-      "timestamp": "2024-12-31T03:28:16.096459",
-      "cpu_percent": 12.4,
-      "ram_percent": 58.2,
-      "ram_used_gb": 37.035972595214844,
-      "gpu_memory_used": 4473.0
-    },
-    {
-      "timestamp": "2024-12-31T03:30:17.092257",
-      "cpu_percent": 12.4,
-      "ram_percent": 58.4,
-      "ram_used_gb": 37.14639663696289,
-      "gpu_memory_used": 4459.0
-    }
-  ]
-}
\ No newline at end of file
diff --git a/examples/benchmarks/benchmark_stats.txt b/examples/benchmarks/benchmark_stats.txt
deleted file mode 100644
index c2a9b02..0000000
--- a/examples/benchmarks/benchmark_stats.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-=== Benchmark Statistics ===
-
-Overall Stats:
-Total tokens processed: 140500
-Total audio generated: 43469.18s
-Total test duration: 1061.20s
-Average processing rate: 137.67 tokens/second
-Average realtime factor: 42.93x
-
-Per-chunk Stats:
-Average chunk size: 5620.00 tokens
-Min chunk size: 100.00 tokens
-Max chunk size: 16000.00 tokens
-Average processing time: 41.13s
-Average output length: 1738.77s
-
-Performance Ranges:
-Processing rate range: 11.70 - 155.99 tokens/second
-Realtime factor range: 3.65x - 49.46x
diff --git a/examples/benchmarks/benchmark_tts.py b/examples/benchmarks/benchmark_tts.py
deleted file mode 100644
index f17e6ee..0000000
--- a/examples/benchmarks/benchmark_tts.py
+++ /dev/null
@@ -1,406 +0,0 @@
-import os
-import json
-import time
-import subprocess
-from datetime import datetime
-
-import pandas as pd
-import psutil
-import seaborn as sns
-import requests
-import tiktoken
-import scipy.io.wavfile as wavfile
-import matplotlib.pyplot as plt
-
-enc = tiktoken.get_encoding("cl100k_base")
-
-
-def setup_plot(fig, ax, title):
-    """Configure plot styling"""
-    # Improve grid
-    ax.grid(True, linestyle="--", alpha=0.3, color="#ffffff")
-
-    # Set title and labels with better fonts
-    ax.set_title(title, pad=20, fontsize=16, fontweight="bold", color="#ffffff")
-    ax.set_xlabel(ax.get_xlabel(), fontsize=14, fontweight="medium", color="#ffffff")
-    ax.set_ylabel(ax.get_ylabel(), fontsize=14, fontweight="medium", color="#ffffff")
-
-    # Improve tick labels
-    ax.tick_params(labelsize=12, colors="#ffffff")
-
-    # Style spines
-    for spine in ax.spines.values():
-        spine.set_color("#ffffff")
-        spine.set_alpha(0.3)
-        spine.set_linewidth(0.5)
-
-    # Set background colors
-    ax.set_facecolor("#1a1a2e")
-    fig.patch.set_facecolor("#1a1a2e")
-
-    return fig, ax
-
-
-def get_text_for_tokens(text: str, num_tokens: int) -> str:
-    """Get a slice of text that contains exactly num_tokens tokens"""
-    tokens = enc.encode(text)
-    if num_tokens > len(tokens):
-        return text
-    return enc.decode(tokens[:num_tokens])
-
-
-def get_audio_length(audio_data: bytes) -> float:
-    """Get audio length in seconds from bytes data"""
-    # Save to a temporary file
-    temp_path = "examples/benchmarks/output/temp.wav"
-    os.makedirs(os.path.dirname(temp_path), exist_ok=True)
-    with open(temp_path, "wb") as f:
-        f.write(audio_data)
-
-    # Read the audio file
-    try:
-        rate, data = wavfile.read(temp_path)
-        return len(data) / rate
-    finally:
-        # Clean up temp file
-        if os.path.exists(temp_path):
-            os.remove(temp_path)
-
-
-def get_gpu_memory():
-    """Get GPU memory usage using nvidia-smi"""
-    try:
-        result = subprocess.check_output(
-            ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,nounits,noheader"]
-        )
-        return float(result.decode("utf-8").strip())
-    except (subprocess.CalledProcessError, FileNotFoundError):
-        return None
-
-
-def get_system_metrics():
-    """Get current system metrics"""
-    metrics = {
-        "timestamp": datetime.now().isoformat(),
-        "cpu_percent": psutil.cpu_percent(),
-        "ram_percent": psutil.virtual_memory().percent,
-        "ram_used_gb": psutil.virtual_memory().used / (1024**3),
-    }
-
-    gpu_mem = get_gpu_memory()
-    if gpu_mem is not None:
-        metrics["gpu_memory_used"] = gpu_mem
-
-    return metrics
-
-
-def make_tts_request(text: str, timeout: int = 120) -> tuple[float, float]:
-    """Make TTS request using OpenAI-compatible endpoint and return processing time and output length"""
-    try:
-        start_time = time.time()
-
-        # Make request to OpenAI-compatible endpoint
-        response = requests.post(
-            "http://localhost:8880/v1/audio/speech",
-            json={
-                "model": "kokoro",
-                "input": text,
-                "voice": "af",
-                "response_format": "wav",
-            },
-            timeout=timeout,
-        )
-        response.raise_for_status()
-
-        processing_time = time.time() - start_time
-        audio_length = get_audio_length(response.content)
-
-        # Save the audio file
-        token_count = len(enc.encode(text))
-        output_file = f"examples/benchmarks/output/chunk_{token_count}_tokens.wav"
-        os.makedirs(os.path.dirname(output_file), exist_ok=True)
-        with open(output_file, "wb") as f:
-            f.write(response.content)
-        print(f"Saved audio to {output_file}")
-
-        return processing_time, audio_length
-
-    except requests.exceptions.RequestException as e:
-        print(f"Error making request for text: {text[:50]}... Error: {str(e)}")
-        return None, None
-    except Exception as e:
-        print(f"Error processing text: {text[:50]}... Error: {str(e)}")
-        return None, None
-
-
-def plot_system_metrics(metrics_data):
-    """Create plots for system metrics over time"""
-    df = pd.DataFrame(metrics_data)
-    df["timestamp"] = pd.to_datetime(df["timestamp"])
-    elapsed_time = (df["timestamp"] - df["timestamp"].iloc[0]).dt.total_seconds()
-
-    # Get baseline values (first measurement)
-    baseline_cpu = df["cpu_percent"].iloc[0]
-    baseline_ram = df["ram_used_gb"].iloc[0]
-    baseline_gpu = (
-        df["gpu_memory_used"].iloc[0] / 1024
-        if "gpu_memory_used" in df.columns
-        else None
-    )  # Convert MB to GB
-
-    # Convert GPU memory to GB
-    if "gpu_memory_used" in df.columns:
-        df["gpu_memory_gb"] = df["gpu_memory_used"] / 1024
-
-    # Set plotting style
-    plt.style.use("dark_background")
-
-    # Create figure with 3 subplots (or 2 if no GPU)
-    has_gpu = "gpu_memory_used" in df.columns
-    num_plots = 3 if has_gpu else 2
-    fig, axes = plt.subplots(num_plots, 1, figsize=(15, 5 * num_plots))
-    fig.patch.set_facecolor("#1a1a2e")
-
-    # Apply rolling average for smoothing
-    window = min(5, len(df) // 2)  # Smaller window for smoother lines
-
-    # Plot 1: CPU Usage
-    smoothed_cpu = df["cpu_percent"].rolling(window=window, center=True).mean()
-    sns.lineplot(
-        x=elapsed_time, y=smoothed_cpu, ax=axes[0], color="#ff2a6d", linewidth=2
-    )
-    axes[0].axhline(
-        y=baseline_cpu, color="#05d9e8", linestyle="--", alpha=0.5, label="Baseline"
-    )
-    axes[0].set_xlabel("Time (seconds)", fontsize=14)
-    axes[0].set_ylabel("CPU Usage (%)", fontsize=14)
-    axes[0].tick_params(labelsize=12)
-    axes[0].set_title("CPU Usage Over Time", pad=20, fontsize=16, fontweight="bold")
-    axes[0].set_ylim(0, max(df["cpu_percent"]) * 1.1)  # Add 10% padding
-    axes[0].legend()
-
-    # Plot 2: RAM Usage
-    smoothed_ram = df["ram_used_gb"].rolling(window=window, center=True).mean()
-    sns.lineplot(
-        x=elapsed_time, y=smoothed_ram, ax=axes[1], color="#05d9e8", linewidth=2
-    )
-    axes[1].axhline(
-        y=baseline_ram, color="#ff2a6d", linestyle="--", alpha=0.5, label="Baseline"
-    )
-    axes[1].set_xlabel("Time (seconds)", fontsize=14)
-    axes[1].set_ylabel("RAM Usage (GB)", fontsize=14)
-    axes[1].tick_params(labelsize=12)
-    axes[1].set_title("RAM Usage Over Time", pad=20, fontsize=16, fontweight="bold")
-    axes[1].set_ylim(0, max(df["ram_used_gb"]) * 1.1)  # Add 10% padding
-    axes[1].legend()
-
-    # Plot 3: GPU Memory (if available)
-    if has_gpu:
-        smoothed_gpu = df["gpu_memory_gb"].rolling(window=window, center=True).mean()
-        sns.lineplot(
-            x=elapsed_time, y=smoothed_gpu, ax=axes[2], color="#ff2a6d", linewidth=2
-        )
-        axes[2].axhline(
-            y=baseline_gpu, color="#05d9e8", linestyle="--", alpha=0.5, label="Baseline"
-        )
-        axes[2].set_xlabel("Time (seconds)", fontsize=14)
-        axes[2].set_ylabel("GPU Memory (GB)", fontsize=14)
-        axes[2].tick_params(labelsize=12)
-        axes[2].set_title(
-            "GPU Memory Usage Over Time", pad=20, fontsize=16, fontweight="bold"
-        )
-        axes[2].set_ylim(0, max(df["gpu_memory_gb"]) * 1.1)  # Add 10% padding
-        axes[2].legend()
-
-    # Style all subplots
-    for ax in axes:
-        ax.grid(True, linestyle="--", alpha=0.3)
-        ax.set_facecolor("#1a1a2e")
-        for spine in ax.spines.values():
-            spine.set_color("#ffffff")
-            spine.set_alpha(0.3)
-
-    plt.tight_layout()
-    plt.savefig("examples/benchmarks/system_usage.png", dpi=300, bbox_inches="tight")
-    plt.close()
-
-
-def main():
-    # Create output directory
-    os.makedirs("examples/benchmarks/output", exist_ok=True)
-
-    # Read input text
-    with open(
-        "examples/benchmarks/the_time_machine_hg_wells.txt", "r", encoding="utf-8"
-    ) as f:
-        text = f.read()
-
-    # Get total tokens in file
-    total_tokens = len(enc.encode(text))
-    print(f"Total tokens in file: {total_tokens}")
-
-    # Generate token sizes with dense sampling at start and increasing intervals
-    dense_range = list(range(100, 1001, 100))
-    current = max(dense_range)
-    large_range = []
-    while current <= total_tokens:
-        large_range.append(current)
-        current += 1000
-
-    token_sizes = sorted(list(set(dense_range + large_range)))
-    print(f"Testing sizes: {token_sizes}")
-
-    # Process chunks
-    results = []
-    system_metrics = []
-    test_start_time = time.time()
-
-    for num_tokens in token_sizes:
-        # Get text slice with exact token count
-        chunk = get_text_for_tokens(text, num_tokens)
-        actual_tokens = len(enc.encode(chunk))
-
-        print(f"\nProcessing chunk with {actual_tokens} tokens:")
-        print(f"Text preview: {chunk[:100]}...")
-
-        # Collect system metrics before processing
-        system_metrics.append(get_system_metrics())
-
-        processing_time, audio_length = make_tts_request(chunk)
-        if processing_time is None or audio_length is None:
-            print("Breaking loop due to error")
-            break
-
-        # Collect system metrics after processing
-        system_metrics.append(get_system_metrics())
-
-        results.append(
-            {
-                "tokens": actual_tokens,
-                "processing_time": processing_time,
-                "output_length": audio_length,
-                "realtime_factor": audio_length / processing_time,
-                "elapsed_time": time.time() - test_start_time,
-            }
-        )
-
-        # Save intermediate results
-        with open("examples/benchmarks/benchmark_results.json", "w") as f:
-            json.dump(
-                {"results": results, "system_metrics": system_metrics}, f, indent=2
-            )
-
-    # Create DataFrame and calculate stats
-    df = pd.DataFrame(results)
-    if df.empty:
-        print("No data to plot")
-        return
-
-    # Calculate useful metrics
-    df["tokens_per_second"] = df["tokens"] / df["processing_time"]
-
-    # Write detailed stats
-    with open("examples/benchmarks/benchmark_stats.txt", "w") as f:
-        f.write("=== Benchmark Statistics ===\n\n")
-
-        f.write("Overall Stats:\n")
-        f.write(f"Total tokens processed: {df['tokens'].sum()}\n")
-        f.write(f"Total audio generated: {df['output_length'].sum():.2f}s\n")
-        f.write(f"Total test duration: {df['elapsed_time'].max():.2f}s\n")
-        f.write(
-            f"Average processing rate: {df['tokens_per_second'].mean():.2f} tokens/second\n"
-        )
-        f.write(f"Average realtime factor: {df['realtime_factor'].mean():.2f}x\n\n")
-
-        f.write("Per-chunk Stats:\n")
-        f.write(f"Average chunk size: {df['tokens'].mean():.2f} tokens\n")
-        f.write(f"Min chunk size: {df['tokens'].min():.2f} tokens\n")
-        f.write(f"Max chunk size: {df['tokens'].max():.2f} tokens\n")
-        f.write(f"Average processing time: {df['processing_time'].mean():.2f}s\n")
-        f.write(f"Average output length: {df['output_length'].mean():.2f}s\n\n")
-
-        f.write("Performance Ranges:\n")
-        f.write(
-            f"Processing rate range: {df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f} tokens/second\n"
-        )
-        f.write(
-            f"Realtime factor range: {df['realtime_factor'].min():.2f}x - {df['realtime_factor'].max():.2f}x\n"
-        )
-
-    # Set plotting style
-    plt.style.use("dark_background")
-
-    # Plot 1: Processing Time vs Token Count
-    fig, ax = plt.subplots(figsize=(12, 8))
-    sns.scatterplot(
-        data=df, x="tokens", y="processing_time", s=100, alpha=0.6, color="#ff2a6d"
-    )
-    sns.regplot(
-        data=df,
-        x="tokens",
-        y="processing_time",
-        scatter=False,
-        color="#05d9e8",
-        line_kws={"linewidth": 2},
-    )
-    corr = df["tokens"].corr(df["processing_time"])
-    plt.text(
-        0.05,
-        0.95,
-        f"Correlation: {corr:.2f}",
-        transform=ax.transAxes,
-        fontsize=10,
-        color="#ffffff",
-        bbox=dict(facecolor="#1a1a2e", edgecolor="#ffffff", alpha=0.7),
-    )
-    setup_plot(fig, ax, "Processing Time vs Input Size")
-    ax.set_xlabel("Number of Input Tokens")
-    ax.set_ylabel("Processing Time (seconds)")
-    plt.savefig("examples/benchmarks/processing_time.png", dpi=300, bbox_inches="tight")
-    plt.close()
-
-    # Plot 2: Realtime Factor vs Token Count
-    fig, ax = plt.subplots(figsize=(12, 8))
-    sns.scatterplot(
-        data=df, x="tokens", y="realtime_factor", s=100, alpha=0.6, color="#ff2a6d"
-    )
-    sns.regplot(
-        data=df,
-        x="tokens",
-        y="realtime_factor",
-        scatter=False,
-        color="#05d9e8",
-        line_kws={"linewidth": 2},
-    )
-    corr = df["tokens"].corr(df["realtime_factor"])
-    plt.text(
-        0.05,
-        0.95,
-        f"Correlation: {corr:.2f}",
-        transform=ax.transAxes,
-        fontsize=10,
-        color="#ffffff",
-        bbox=dict(facecolor="#1a1a2e", edgecolor="#ffffff", alpha=0.7),
-    )
-    setup_plot(fig, ax, "Realtime Factor vs Input Size")
-    ax.set_xlabel("Number of Input Tokens")
-    ax.set_ylabel("Realtime Factor (output length / processing time)")
-    plt.savefig("examples/benchmarks/realtime_factor.png", dpi=300, bbox_inches="tight")
-    plt.close()
-
-    # Plot system metrics
-    plot_system_metrics(system_metrics)
-
-    print("\nResults saved to:")
-    print("- examples/benchmarks/benchmark_results.json")
-    print("- examples/benchmarks/benchmark_stats.txt")
-    print("- examples/benchmarks/processing_time.png")
-    print("- examples/benchmarks/realtime_factor.png")
-    print("- examples/benchmarks/system_usage.png")
-    if any("gpu_memory_used" in m for m in system_metrics):
-        print("- examples/benchmarks/gpu_usage.png")
-    print("\nAudio files saved in examples/benchmarks/output/")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/benchmarks/processing_time.png b/examples/benchmarks/processing_time.png
deleted file mode 100644
index c66fcaf..0000000
Binary files a/examples/benchmarks/processing_time.png and /dev/null differ
diff --git a/examples/benchmarks/realtime_factor.png b/examples/benchmarks/realtime_factor.png
deleted file mode 100644
index 249685b..0000000
Binary files a/examples/benchmarks/realtime_factor.png and /dev/null differ
diff --git a/examples/benchmarks/system_usage.png b/examples/benchmarks/system_usage.png
deleted file mode 100644
index bc10eb0..0000000
Binary files a/examples/benchmarks/system_usage.png and /dev/null differ