diff --git a/.coverage b/.coverage
deleted file mode 100644
index 42652cc..0000000
Binary files a/.coverage and /dev/null differ
diff --git a/README.md b/README.md
index 2e38852..8f83b33 100644
--- a/README.md
+++ b/README.md
@@ -129,7 +129,7 @@ response = requests.post(
)
```
-
+
@@ -144,7 +144,7 @@ response = requests.post(
- pcm
-
+
@@ -175,8 +175,8 @@ Benchmarking was performed on generation via the local API using text lengths up
- H.G. Wells - The Time Machine (full text)
-
-
+
+
Key Performance Metrics:
diff --git a/api/src/core/config.py b/api/src/core/config.py
index 5348730..ad0ef1c 100644
--- a/api/src/core/config.py
+++ b/api/src/core/config.py
@@ -18,6 +18,8 @@ class Settings(BaseSettings):
onnx_model_path: str = "kokoro-v0_19.onnx"
voices_dir: str = "voices"
sample_rate: int = 24000
+ max_chunk_size: int = 300 # Maximum size of text chunks for processing
+ gap_trim_ms: int = 250 # Amount to trim from streaming chunk ends in milliseconds
# ONNX Optimization Settings
onnx_num_threads: int = 4 # Number of threads for intra-op parallelism
diff --git a/api/src/core/don_quixote.txt b/api/src/core/don_quixote.txt
new file mode 100644
index 0000000..a32a3a6
--- /dev/null
+++ b/api/src/core/don_quixote.txt
@@ -0,0 +1,9 @@
+In a village of La Mancha, the name of which I have no desire to call
+to mind, there lived not long since one of those gentlemen that keep a
+lance in the lance-rack, an old buckler, a lean hack, and a greyhound
+for coursing. An olla of rather more beef than mutton, a salad on most
+nights, scraps on Saturdays, lentils on Fridays, and a pigeon or so
+extra on Sundays, made away with three-quarters of his income. The rest
+of it went in a doublet of fine cloth and velvet breeches and shoes to
+match for holidays, while on week-days he made a brave figure in his
+best homespun.
\ No newline at end of file
diff --git a/api/src/main.py b/api/src/main.py
index 4603e1f..fc51043 100644
--- a/api/src/main.py
+++ b/api/src/main.py
@@ -22,10 +22,11 @@ async def lifespan(app: FastAPI):
logger.info("Loading TTS model and voice packs...")
# Initialize the main model with warm-up
- voicepack_count = TTSModel.setup()
+ voicepack_count = await TTSModel.setup()
# boundary = "█████╗"*9
- boundary = "░" * 30
+ boundary = "░" * 24
startup_msg =f"""
+
{boundary}
╔═╗┌─┐┌─┐┌┬┐
@@ -37,8 +38,9 @@ async def lifespan(app: FastAPI):
{boundary}
"""
- startup_msg += f"\nModel loaded and warmed up on {TTSModel.get_device()}"
- startup_msg += f"\n{voicepack_count} voice packs loaded successfully\n"
+ # TODO: Improve CPU warmup, threads, memory, etc
+ startup_msg += f"\nModel warmed up on {TTSModel.get_device()}"
+ startup_msg += f"\n{voicepack_count} voice packs loaded\n"
startup_msg += f"\n{boundary}\n"
logger.info(startup_msg)
diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py
index 5d465f5..2b30c7a 100644
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@@ -83,8 +83,8 @@ async def create_speech(
audio,
24000,
request.response_format,
- is_first_chunk=True
- )
+ is_first_chunk=True,
+ stream=False)
return Response(
content=content,
diff --git a/api/src/services/audio.py b/api/src/services/audio.py
index e13d91f..dcb2a72 100644
--- a/api/src/services/audio.py
+++ b/api/src/services/audio.py
@@ -4,22 +4,30 @@ from io import BytesIO
import numpy as np
import soundfile as sf
+import scipy.io.wavfile as wavfile
from loguru import logger
-
+from ..core.config import settings
class AudioNormalizer:
"""Handles audio normalization state for a single stream"""
def __init__(self):
self.int16_max = np.iinfo(np.int16).max
+ self.chunk_trim_ms = settings.gap_trim_ms
+ self.sample_rate = 24000 # Sample rate of the audio
+ self.samples_to_trim = int(self.chunk_trim_ms * self.sample_rate / 1000)
- def normalize(self, audio_data: np.ndarray) -> np.ndarray:
- """Normalize audio data to int16 range"""
+ def normalize(self, audio_data: np.ndarray, is_last_chunk: bool = False) -> np.ndarray:
+ """Normalize audio data to int16 range and trim chunk boundaries"""
# Convert to float32 if not already
audio_float = audio_data.astype(np.float32)
# Normalize to [-1, 1] range first
if np.max(np.abs(audio_float)) > 0:
audio_float = audio_float / np.max(np.abs(audio_float))
+
+ # Trim end of non-final chunks to reduce gaps
+ if not is_last_chunk and len(audio_float) > self.samples_to_trim:
+ audio_float = audio_float[:-self.samples_to_trim]
# Scale to int16 range
return (audio_float * self.int16_max).astype(np.int16)
@@ -27,13 +35,30 @@ class AudioNormalizer:
class AudioService:
"""Service for audio format conversions"""
+ # Default audio format settings balanced for speed and compression
+ DEFAULT_SETTINGS = {
+ "mp3": {
+ "bitrate_mode": "CONSTANT", # Faster than variable bitrate
+ "compression_level": 0.0, # Balanced compression
+ },
+ "opus": {
+ "compression_level": 0.0, # Good balance for speech
+ },
+ "flac": {
+ "compression_level": 0.0, # Light compression, still fast
+ }
+ }
+
@staticmethod
def convert_audio(
audio_data: np.ndarray,
sample_rate: int,
output_format: str,
is_first_chunk: bool = True,
- normalizer: AudioNormalizer = None
+ is_last_chunk: bool = False,
+ normalizer: AudioNormalizer = None,
+ format_settings: dict = None,
+ stream: bool = True
) -> bytes:
"""Convert audio data to specified format
@@ -42,6 +67,19 @@ class AudioService:
sample_rate: Sample rate of the audio
output_format: Target format (wav, mp3, opus, flac, pcm)
is_first_chunk: Whether this is the first chunk of a stream
+ normalizer: Optional AudioNormalizer instance for consistent normalization across chunks
+ format_settings: Optional dict of format-specific settings to override defaults
+ Example: {
+ "mp3": {
+ "bitrate_mode": "VARIABLE",
+ "compression_level": 0.8
+ }
+ }
+ Default settings balance speed and compression:
+ optimized for localhost @ 0.0
+ - MP3: constant bitrate, no compression (0.0)
+ - OPUS: no compression (0.0)
+ - FLAC: no compression (0.0)
Returns:
Bytes of the converted audio
@@ -50,31 +88,48 @@ class AudioService:
try:
# Always normalize audio to ensure proper amplitude scaling
- if normalizer is None:
- normalizer = AudioNormalizer()
- normalized_audio = normalizer.normalize(audio_data)
+ if stream:
+ if normalizer is None:
+ normalizer = AudioNormalizer()
+ normalized_audio = normalizer.normalize(audio_data, is_last_chunk=is_last_chunk)
+ else:
+ normalized_audio = audio_data
if output_format == "pcm":
- logger.info("Writing PCM data...")
# Raw 16-bit PCM samples, no header
buffer.write(normalized_audio.tobytes())
elif output_format == "wav":
- logger.info("Writing to WAV format...")
- # Always include WAV header for WAV format
- sf.write(buffer, normalized_audio, sample_rate, format="WAV", subtype='PCM_16')
+ if stream:
+ # Use soundfile for streaming to ensure proper headers
+ sf.write(buffer, normalized_audio, sample_rate, format="WAV", subtype='PCM_16')
+ else:
+ # Trying scipy.io.wavfile for non-streaming WAV generation
+ # seems faster than soundfile
+ # avoids overhead from header generation and PCM encoding
+ wavfile.write(buffer, sample_rate, normalized_audio)
elif output_format == "mp3":
- logger.info("Converting to MP3 format...")
- # Use lower bitrate for streaming
- sf.write(buffer, normalized_audio, sample_rate, format="MP3")
+ # Use format settings or defaults
+ settings = format_settings.get("mp3", {}) if format_settings else {}
+ settings = {**AudioService.DEFAULT_SETTINGS["mp3"], **settings}
+ sf.write(
+ buffer, normalized_audio,
+ sample_rate, format="MP3",
+ **settings
+ )
+
elif output_format == "opus":
- logger.info("Converting to Opus format...")
- # Use lower bitrate and smaller frame size for streaming
- sf.write(buffer, normalized_audio, sample_rate, format="OGG", subtype="OPUS")
+ settings = format_settings.get("opus", {}) if format_settings else {}
+ settings = {**AudioService.DEFAULT_SETTINGS["opus"], **settings}
+ sf.write(buffer, normalized_audio, sample_rate, format="OGG",
+ subtype="OPUS", **settings)
+
elif output_format == "flac":
- logger.info("Converting to FLAC format...")
- # Use smaller block size for streaming
+ if is_first_chunk:
+ logger.info("Starting FLAC stream...")
+ settings = format_settings.get("flac", {}) if format_settings else {}
+ settings = {**AudioService.DEFAULT_SETTINGS["flac"], **settings}
sf.write(buffer, normalized_audio, sample_rate, format="FLAC",
- subtype='PCM_16')
+ subtype='PCM_16', **settings)
else:
if output_format == "aac":
raise ValueError(
diff --git a/api/src/services/text_processing/chunker.py b/api/src/services/text_processing/chunker.py
new file mode 100644
index 0000000..c0c59eb
--- /dev/null
+++ b/api/src/services/text_processing/chunker.py
@@ -0,0 +1,52 @@
+"""Text chunking service"""
+
+import re
+from ...core.config import settings
+
+
+def split_text(text: str, max_chunk=None):
+ """Split text into chunks on natural pause points
+
+ Args:
+ text: Text to split into chunks
+ max_chunk: Maximum chunk size (defaults to settings.max_chunk_size)
+ """
+ if max_chunk is None:
+ max_chunk = settings.max_chunk_size
+
+ if not isinstance(text, str):
+ text = str(text) if text is not None else ""
+
+ text = text.strip()
+ if not text:
+ return
+
+ # First split into sentences
+ sentences = re.split(r"(?<=[.!?])\s+", text)
+
+ for sentence in sentences:
+ sentence = sentence.strip()
+ if not sentence:
+ continue
+
+ # For medium-length sentences, split on punctuation
+ if len(sentence) > max_chunk: # Lower threshold for more consistent sizes
+ # First try splitting on semicolons and colons
+ parts = re.split(r"(?<=[;:])\s+", sentence)
+
+ for part in parts:
+ part = part.strip()
+ if not part:
+ continue
+
+ # If part is still long, split on commas
+ if len(part) > max_chunk:
+ subparts = re.split(r"(?<=,)\s+", part)
+ for subpart in subparts:
+ subpart = subpart.strip()
+ if subpart:
+ yield subpart
+ else:
+ yield part
+ else:
+ yield sentence
diff --git a/api/src/services/tts_base.py b/api/src/services/tts_base.py
index f502373..16e8462 100644
--- a/api/src/services/tts_base.py
+++ b/api/src/services/tts_base.py
@@ -15,7 +15,7 @@ class TTSBaseModel(ABC):
VOICES_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "voices")
@classmethod
- def setup(cls):
+ async def setup(cls):
"""Initialize model and setup voices"""
with cls._lock:
# Set device
@@ -59,19 +59,23 @@ class TTSBaseModel(ABC):
except Exception as e:
logger.error(f"Error copying voice {voice_name}: {str(e)}")
- # Warm up with default voice
+ # Load warmup text
try:
- dummy_text = "Hello"
- voice_path = os.path.join(cls.VOICES_DIR, "af.pt")
- dummy_voicepack = torch.load(voice_path, map_location=cls._device, weights_only=True)
-
- # Process text and generate audio
- phonemes, tokens = cls.process_text(dummy_text, "a")
- cls.generate_from_tokens(tokens, dummy_voicepack, 1.0)
-
- logger.info("Model warm-up complete")
+ with open(os.path.join(os.path.dirname(os.path.dirname(__file__)), "core", "don_quixote.txt")) as f:
+ warmup_text = f.read()
except Exception as e:
- logger.warning(f"Model warm-up failed: {e}")
+ logger.warning(f"Failed to load warmup text: {e}")
+ warmup_text = "This is a warmup text that will be split into chunks for processing."
+
+ # Use warmup service
+ from .warmup import WarmupService
+ warmup = WarmupService()
+
+ # Load and warm up voices
+ loaded_voices = warmup.load_voices()
+ await warmup.warmup_voices(warmup_text, loaded_voices)
+
+ logger.info("Model warm-up complete")
# Count voices in directory
voice_count = len([f for f in os.listdir(cls.VOICES_DIR) if f.endswith(".pt")])
diff --git a/api/src/services/tts_gpu.py b/api/src/services/tts_gpu.py
index 300d141..51c8424 100644
--- a/api/src/services/tts_gpu.py
+++ b/api/src/services/tts_gpu.py
@@ -1,6 +1,7 @@
import os
import numpy as np
import torch
+import time
from loguru import logger
from models import build_model
from .text_processing import phonemize, tokenize
@@ -8,42 +9,97 @@ from .text_processing import phonemize, tokenize
from .tts_base import TTSBaseModel
from ..core.config import settings
+# @torch.no_grad()
+# def forward(model, tokens, ref_s, speed):
+# """Forward pass through the model"""
+# device = ref_s.device
+# tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
+# input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
+# text_mask = length_to_mask(input_lengths).to(device)
+# bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
+# d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
+# s = ref_s[:, 128:]
+# d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
+# x, _ = model.predictor.lstm(d)
+# duration = model.predictor.duration_proj(x)
+# duration = torch.sigmoid(duration).sum(axis=-1) / speed
+# pred_dur = torch.round(duration).clamp(min=1).long()
+# pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
+# c_frame = 0
+# for i in range(pred_aln_trg.size(0)):
+# pred_aln_trg[i, c_frame : c_frame + pred_dur[0, i].item()] = 1
+# c_frame += pred_dur[0, i].item()
+# en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
+# F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
+# t_en = model.text_encoder(tokens, input_lengths, text_mask)
+# asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
+# return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
@torch.no_grad()
def forward(model, tokens, ref_s, speed):
- """Forward pass through the model"""
+ """Forward pass through the model with light optimizations that preserve output quality"""
device = ref_s.device
+
+ # Keep original token handling but optimize device placement
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
text_mask = length_to_mask(input_lengths).to(device)
+
+ # BERT and encoder pass
bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
- s = ref_s[:, 128:]
- d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
+
+ # Split reference signal once for efficiency
+ s_content = ref_s[:, 128:]
+ s_ref = ref_s[:, :128]
+
+ # Predictor forward pass
+ d = model.predictor.text_encoder(d_en, s_content, input_lengths, text_mask)
x, _ = model.predictor.lstm(d)
+
+ # Duration prediction - keeping original logic
duration = model.predictor.duration_proj(x)
duration = torch.sigmoid(duration).sum(axis=-1) / speed
pred_dur = torch.round(duration).clamp(min=1).long()
- pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
+
+ # Alignment matrix construction - keeping original approach for quality
+ pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item(), device=device)
c_frame = 0
for i in range(pred_aln_trg.size(0)):
- pred_aln_trg[i, c_frame : c_frame + pred_dur[0, i].item()] = 1
+ pred_aln_trg[i, c_frame:c_frame + pred_dur[0, i].item()] = 1
c_frame += pred_dur[0, i].item()
- en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
- F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
+
+ # Matrix multiplications - reuse unsqueezed tensor
+ pred_aln_trg = pred_aln_trg.unsqueeze(0) # Do unsqueeze once
+ en = d.transpose(-1, -2) @ pred_aln_trg
+ F0_pred, N_pred = model.predictor.F0Ntrain(en, s_content)
+
+ # Text encoding and final decoding
t_en = model.text_encoder(tokens, input_lengths, text_mask)
- asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
- return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
+ asr = t_en @ pred_aln_trg
+
+ return model.decoder(asr, F0_pred, N_pred, s_ref).squeeze().cpu().numpy()
+
+# def length_to_mask(lengths):
+# """Create attention mask from lengths"""
+# mask = (
+# torch.arange(lengths.max())
+# .unsqueeze(0)
+# .expand(lengths.shape[0], -1)
+# .type_as(lengths)
+# )
+# mask = torch.gt(mask + 1, lengths.unsqueeze(1))
+# return mask
def length_to_mask(lengths):
- """Create attention mask from lengths"""
- mask = (
- torch.arange(lengths.max())
- .unsqueeze(0)
- .expand(lengths.shape[0], -1)
- .type_as(lengths)
- )
- mask = torch.gt(mask + 1, lengths.unsqueeze(1))
- return mask
+ """Create attention mask from lengths - possibly optimized version"""
+ max_len = lengths.max()
+ # Create mask directly on the same device as lengths
+ mask = torch.arange(max_len, device=lengths.device)[None, :].expand(lengths.shape[0], -1)
+ # Avoid type_as by using the correct dtype from the start
+ if lengths.dtype != mask.dtype:
+ mask = mask.to(dtype=lengths.dtype)
+ # Fuse operations using broadcasting
+ return mask + 1 > lengths[:, None]
class TTSGPUModel(TTSBaseModel):
_instance = None
diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py
index fbd20e6..8e2aed6 100644
--- a/api/src/services/tts_service.py
+++ b/api/src/services/tts_service.py
@@ -8,7 +8,7 @@ from functools import lru_cache
import numpy as np
import torch
import scipy.io.wavfile as wavfile
-from .text_processing import normalize_text
+from .text_processing import normalize_text, chunker
from loguru import logger
from ..core.config import settings
@@ -20,40 +20,6 @@ class TTSService:
def __init__(self, output_dir: str = None):
self.output_dir = output_dir
- def _split_text(self, text: str):
- """Generate text chunks one at a time, splitting on natural pause points"""
- if not isinstance(text, str):
- text = str(text) if text is not None else ""
-
- # First split into sentences
- sentences = re.split(r"(?<=[.!?])\s+", text)
-
- for sentence in sentences:
- sentence = sentence.strip()
- if not sentence:
- continue
-
- # For longer sentences, split on commas and semicolons
- if len(sentence) > 300: # Only split long sentences
- # Split on pause points while preserving the punctuation
- chunks = re.split(r"((?<=[,;])\s+)", sentence)
-
- # Reassemble chunks with their trailing punctuation
- current_chunk = ""
- for i, chunk in enumerate(chunks):
- if i % 2 == 0: # Text chunk
- current_chunk += chunk
- else: # Punctuation/whitespace chunk
- current_chunk += chunk
- if current_chunk.strip():
- yield current_chunk.strip()
- current_chunk = ""
-
- # Yield any remaining text
- if current_chunk.strip():
- yield current_chunk.strip()
- else:
- yield sentence
@staticmethod
@lru_cache(maxsize=20) # Cache up to 8 most recently used voices
@@ -96,28 +62,32 @@ class TTSService:
# Load voice using cached loader
voicepack = self._load_voice(voice_path)
- # Generate audio with or without stitching
+ # For non-streaming, preprocess all chunks first
if stitch_long_output:
- audio_chunks = []
- chunk_count = 0
-
- # Process chunks as they're generated
- for chunk in self._split_text(text):
+ # Preprocess all chunks to phonemes/tokens
+ chunks_data = []
+ for chunk in chunker.split_text(text):
try:
- # Process text and generate audio
phonemes, tokens = TTSModel.process_text(chunk, voice[0])
+ chunks_data.append((chunk, tokens))
+ except Exception as e:
+ logger.error(f"Failed to process chunk: '{chunk}'. Error: {str(e)}")
+ continue
+
+ if not chunks_data:
+ raise ValueError("No chunks were processed successfully")
+
+ # Generate audio for all chunks
+ audio_chunks = []
+ for chunk, tokens in chunks_data:
+ try:
chunk_audio = TTSModel.generate_from_tokens(tokens, voicepack, speed)
-
if chunk_audio is not None:
audio_chunks.append(chunk_audio)
- chunk_count += 1
else:
- logger.error(f"No audio generated for chunk {chunk_count + 1}")
-
+ logger.error(f"No audio generated for chunk: '{chunk}'")
except Exception as e:
- logger.error(
- f"Failed to generate audio for chunk {chunk_count + 1}: '{chunk}'. Error: {str(e)}"
- )
+ logger.error(f"Failed to generate audio for chunk: '{chunk}'. Error: {str(e)}")
continue
if not audio_chunks:
@@ -138,53 +108,93 @@ class TTSService:
raise
async def generate_audio_stream(
- self, text: str, voice: str, speed: float, output_format: str = "wav"
+ self, text: str, voice: str, speed: float, output_format: str = "wav", silent=False
):
"""Generate and yield audio chunks as they're generated for real-time streaming"""
try:
+ stream_start = time.time()
# Create normalizer for consistent audio levels
stream_normalizer = AudioNormalizer()
# Input validation and preprocessing
if not text:
raise ValueError("Text is empty")
+ preprocess_start = time.time()
normalized = normalize_text(text)
if not normalized:
raise ValueError("Text is empty after preprocessing")
text = str(normalized)
+ logger.debug(f"Text preprocessing took: {(time.time() - preprocess_start)*1000:.1f}ms")
# Voice validation and loading
+ voice_start = time.time()
voice_path = self._get_voice_path(voice)
if not voice_path:
raise ValueError(f"Voice not found: {voice}")
voicepack = self._load_voice(voice_path)
+ logger.debug(f"Voice loading took: {(time.time() - voice_start)*1000:.1f}ms")
# Process chunks as they're generated
is_first = True
- for chunk in self._split_text(text):
+ chunks_processed = 0
+ # last_chunk_end = time.time()
+
+ # Process chunks as they come from generator
+ chunk_gen = chunker.split_text(text)
+ current_chunk = next(chunk_gen, None)
+
+ while current_chunk is not None:
+ next_chunk = next(chunk_gen, None) # Peek at next chunk
+ # chunk_start = time.time()
+ chunks_processed += 1
try:
# Process text and generate audio
- phonemes, tokens = TTSModel.process_text(chunk, voice[0])
+ # text_process_start = time.time()
+ phonemes, tokens = TTSModel.process_text(current_chunk, voice[0])
+ # text_process_time = time.time() - text_process_start
+
+ # audio_gen_start = time.time()
chunk_audio = TTSModel.generate_from_tokens(tokens, voicepack, speed)
-
+ # audio_gen_time = time.time() - audio_gen_start
+
if chunk_audio is not None:
# Convert chunk with proper header handling
+ convert_start = time.time()
chunk_bytes = AudioService.convert_audio(
chunk_audio,
24000,
output_format,
is_first_chunk=is_first,
- normalizer=stream_normalizer
+ normalizer=stream_normalizer,
+ is_last_chunk=(next_chunk is None) # Last if no next chunk
)
+ # convert_time = time.time() - convert_start
+
+ # Calculate gap from last chunk
+ # gap_time = chunk_start - last_chunk_end
+
+ # Log timing details if not silent
+ # if not silent:
+ # logger.debug(
+ # f"\nChunk {chunks_processed} timing:"
+ # f"\n Gap from last chunk: {gap_time*1000:.1f}ms"
+ # f"\n Text processing: {text_process_time*1000:.1f}ms"
+ # f"\n Audio generation: {audio_gen_time*1000:.1f}ms"
+ # f"\n Audio conversion: {convert_time*1000:.1f}ms"
+ # f"\n Total chunk time: {(time.time() - chunk_start)*1000:.1f}ms"
+ # )
+
yield chunk_bytes
is_first = False
+ # last_chunk_end = time.time()
else:
- logger.error(f"No audio generated for chunk: '{chunk}'")
+ logger.error(f"No audio generated for chunk: '{current_chunk}'")
except Exception as e:
- logger.error(f"Failed to generate audio for chunk: '{chunk}'. Error: {str(e)}")
- continue
-
+ logger.error(f"Failed to generate audio for chunk: '{current_chunk}'. Error: {str(e)}")
+
+ current_chunk = next_chunk # Move to next chunk
+
except Exception as e:
logger.error(f"Error in audio generation stream: {str(e)}")
raise
diff --git a/api/src/services/warmup.py b/api/src/services/warmup.py
new file mode 100644
index 0000000..67937dd
--- /dev/null
+++ b/api/src/services/warmup.py
@@ -0,0 +1,52 @@
+import os
+from typing import List, Tuple
+import torch
+from loguru import logger
+
+from .tts_service import TTSService
+from .tts_model import TTSModel
+
+
+class WarmupService:
+ """Service for warming up TTS models and voice caches"""
+
+ def __init__(self):
+ self.tts_service = TTSService()
+
+ def load_voices(self) -> List[Tuple[str, torch.Tensor]]:
+ """Load and cache voices up to LRU limit"""
+ # Get all voices sorted by filename length (shorter names first, usually base voices)
+ voice_files = sorted(
+ [f for f in os.listdir(TTSModel.VOICES_DIR) if f.endswith(".pt")],
+ key=len
+ )
+
+ # Load up to LRU cache limit (20)
+ loaded_voices = []
+ for voice_file in voice_files[:20]:
+ try:
+ voice_path = os.path.join(TTSModel.VOICES_DIR, voice_file)
+ voicepack = torch.load(voice_path, map_location=TTSModel.get_device(), weights_only=True)
+ loaded_voices.append((voice_file[:-3], voicepack)) # Store name and tensor
+ # logger.info(f"Loaded voice {voice_file[:-3]} into cache")
+ except Exception as e:
+ logger.error(f"Failed to load voice {voice_file}: {e}")
+ logger.info(f"Pre-loaded {len(loaded_voices)} voices into cache")
+ return loaded_voices
+
+ async def warmup_voices(self, warmup_text: str, loaded_voices: List[Tuple[str, torch.Tensor]]):
+ """Warm up voice inference and streaming"""
+ n_warmups = 1
+ for voice_name, _ in loaded_voices[:n_warmups]:
+ try:
+ logger.info(f"Running warmup inference on voice {voice_name}")
+ async for _ in self.tts_service.generate_audio_stream(
+ warmup_text,
+ voice_name,
+ 1.0,
+ "pcm"
+ ):
+ pass # Process all chunks to properly warm up
+ logger.info(f"Completed warmup for voice {voice_name}")
+ except Exception as e:
+ logger.warning(f"Warmup failed for voice {voice_name}: {e}")
diff --git a/api/tests/test_chunker.py b/api/tests/test_chunker.py
new file mode 100644
index 0000000..ed598c0
--- /dev/null
+++ b/api/tests/test_chunker.py
@@ -0,0 +1,35 @@
+"""Tests for text chunking service"""
+
+import pytest
+from api.src.services.text_processing import chunker
+
+
+def test_split_text():
+ """Test text splitting into sentences"""
+ text = "First sentence. Second sentence! Third sentence?"
+ sentences = list(chunker.split_text(text))
+ assert len(sentences) == 3
+ assert sentences[0] == "First sentence."
+ assert sentences[1] == "Second sentence!"
+ assert sentences[2] == "Third sentence?"
+
+
+def test_split_text_empty():
+ """Test splitting empty text"""
+ assert list(chunker.split_text("")) == []
+
+
+def test_split_text_single_sentence():
+ """Test splitting single sentence"""
+ text = "Just one sentence."
+ assert list(chunker.split_text(text)) == ["Just one sentence."]
+
+
+def test_split_text_with_custom_chunk_size():
+ """Test splitting with custom max chunk size"""
+ text = "First part, second part, third part."
+ chunks = list(chunker.split_text(text, max_chunk=15))
+ assert len(chunks) == 3
+ assert chunks[0] == "First part,"
+ assert chunks[1] == "second part,"
+ assert chunks[2] == "third part."
diff --git a/api/tests/test_endpoints.py b/api/tests/test_endpoints.py
index 6142e12..4827adc 100644
--- a/api/tests/test_endpoints.py
+++ b/api/tests/test_endpoints.py
@@ -1,7 +1,8 @@
-from unittest.mock import Mock
+from unittest.mock import Mock, AsyncMock
import pytest
import pytest_asyncio
+import asyncio
from fastapi.testclient import TestClient
from httpx import AsyncClient
@@ -22,6 +23,12 @@ async def async_client():
def mock_tts_service(monkeypatch):
mock_service = Mock()
mock_service._generate_audio.return_value = (bytes([0, 1, 2, 3]), 1.0)
+
+ # Create proper async generator mock
+ async def mock_stream(*args, **kwargs):
+ for chunk in [b"chunk1", b"chunk2"]:
+ yield chunk
+ mock_service.generate_audio_stream = mock_stream
mock_service.list_voices.return_value = [
"af",
"bm_lewis",
@@ -65,6 +72,7 @@ def test_openai_speech_endpoint(mock_tts_service, mock_audio_service):
"voice": "bm_lewis",
"response_format": "wav",
"speed": 1.0,
+ "stream": False # Explicitly disable streaming
}
response = client.post("/v1/audio/speech", json=test_request)
assert response.status_code == 200
@@ -84,6 +92,7 @@ def test_openai_speech_invalid_voice(mock_tts_service):
"voice": "invalid_voice",
"response_format": "wav",
"speed": 1.0,
+ "stream": False # Explicitly disable streaming
}
response = client.post("/v1/audio/speech", json=test_request)
assert response.status_code == 400 # Bad request
@@ -98,6 +107,7 @@ def test_openai_speech_invalid_speed(mock_tts_service):
"voice": "af",
"response_format": "wav",
"speed": -1.0, # Invalid speed
+ "stream": False # Explicitly disable streaming
}
response = client.post("/v1/audio/speech", json=test_request)
assert response.status_code == 422 # Validation error
@@ -112,6 +122,7 @@ def test_openai_speech_generation_error(mock_tts_service):
"voice": "af",
"response_format": "wav",
"speed": 1.0,
+ "stream": False # Explicitly disable streaming
}
response = client.post("/v1/audio/speech", json=test_request)
assert response.status_code == 500
@@ -171,13 +182,14 @@ async def test_openai_speech_pcm_streaming(mock_tts_service, async_client):
"input": "Hello world",
"voice": "af",
"response_format": "pcm",
+ "stream": True
}
- # Mock streaming response
- async def mock_stream():
- yield b"chunk1"
- yield b"chunk2"
- mock_tts_service.generate_audio_stream.return_value = mock_stream()
+ # Create streaming mock for this test
+ async def mock_stream(*args, **kwargs):
+ for chunk in [b"chunk1", b"chunk2"]:
+ yield chunk
+ mock_tts_service.generate_audio_stream = mock_stream
# Add streaming header
headers = {"x-raw-response": "stream"}
@@ -198,13 +210,14 @@ async def test_openai_speech_streaming_mp3(mock_tts_service, async_client):
"input": "Hello world",
"voice": "af",
"response_format": "mp3",
+ "stream": True
}
- # Mock streaming response
- async def mock_stream():
- yield b"mp3header"
- yield b"mp3data"
- mock_tts_service.generate_audio_stream.return_value = mock_stream()
+ # Create streaming mock for this test
+ async def mock_stream(*args, **kwargs):
+ for chunk in [b"mp3header", b"mp3data"]:
+ yield chunk
+ mock_tts_service.generate_audio_stream = mock_stream
# Add streaming header
headers = {"x-raw-response": "stream"}
@@ -227,14 +240,14 @@ async def test_openai_speech_streaming_generator(mock_tts_service, async_client)
"input": "Hello world",
"voice": "af",
"response_format": "pcm",
+ "stream": True
}
- # Mock streaming response
- async def mock_stream():
- yield b"chunk1"
- yield b"chunk2"
-
- mock_tts_service.generate_audio_stream.return_value = mock_stream()
+ # Create streaming mock for this test
+ async def mock_stream(*args, **kwargs):
+ for chunk in [b"chunk1", b"chunk2"]:
+ yield chunk
+ mock_tts_service.generate_audio_stream = mock_stream
# Add streaming header
headers = {"x-raw-response": "stream"}
diff --git a/api/tests/test_main.py b/api/tests/test_main.py
index 51026c5..cb7aa8b 100644
--- a/api/tests/test_main.py
+++ b/api/tests/test_main.py
@@ -28,29 +28,34 @@ async def test_lifespan_successful_warmup(mock_logger, mock_tts_model):
"""Test successful model warmup in lifespan"""
# Mock file system for voice counting
mock_tts_model.VOICES_DIR = "/mock/voices"
- with patch("os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt"]):
- mock_tts_model.setup.return_value = 3 # 3 voice files
- mock_tts_model.get_device.return_value = "cuda"
-
- # Create an async generator from the lifespan context manager
- async_gen = lifespan(MagicMock())
- # Start the context manager
- await async_gen.__aenter__()
-
- # Verify the expected logging sequence
- mock_logger.info.assert_any_call("Loading TTS model and voice packs...")
- # Check for the startup message containing the required info
- startup_calls = [call[0][0] for call in mock_logger.info.call_args_list]
- startup_msg = next(msg for msg in startup_calls if "Model loaded and warmed up on" in msg)
- assert "Model loaded and warmed up on cuda" in startup_msg
- assert "3 voice packs loaded successfully" in startup_msg
+ # Create async mock
+ async def async_setup():
+ return 3
+ mock_tts_model.setup = MagicMock()
+ mock_tts_model.setup.side_effect = async_setup
+ mock_tts_model.get_device.return_value = "cuda"
+
+ with patch("os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt"]):
+ # Create an async generator from the lifespan context manager
+ async_gen = lifespan(MagicMock())
+ # Start the context manager
+ await async_gen.__aenter__()
- # Verify model setup was called
- mock_tts_model.setup.assert_called_once()
+ # Verify the expected logging sequence
+ mock_logger.info.assert_any_call("Loading TTS model and voice packs...")
+
+ # Check for the startup message containing the required info
+ startup_calls = [call[0][0] for call in mock_logger.info.call_args_list]
+ startup_msg = next(msg for msg in startup_calls if "Model warmed up on" in msg)
+ assert "Model warmed up on" in startup_msg
+ assert "3 voice packs loaded" in startup_msg
- # Clean up
- await async_gen.__aexit__(None, None, None)
+ # Verify model setup was called
+ mock_tts_model.setup.assert_called_once()
+
+ # Clean up
+ await async_gen.__aexit__(None, None, None)
@pytest.mark.asyncio
@@ -81,39 +86,21 @@ async def test_lifespan_cuda_warmup(mock_tts_model):
"""Test model warmup specifically on CUDA"""
# Mock file system for voice counting
mock_tts_model.VOICES_DIR = "/mock/voices"
+
+ # Create async mock
+ async def async_setup():
+ return 2
+ mock_tts_model.setup = MagicMock()
+ mock_tts_model.setup.side_effect = async_setup
+ mock_tts_model.get_device.return_value = "cuda"
+
with patch("os.listdir", return_value=["voice1.pt", "voice2.pt"]):
- mock_tts_model.setup.return_value = 2 # 2 voice files
- mock_tts_model.get_device.return_value = "cuda"
+ # Create an async generator from the lifespan context manager
+ async_gen = lifespan(MagicMock())
+ await async_gen.__aenter__()
- # Create an async generator from the lifespan context manager
- async_gen = lifespan(MagicMock())
- await async_gen.__aenter__()
+ # Verify model setup was called
+ mock_tts_model.setup.assert_called_once()
- # Verify model setup was called
- mock_tts_model.setup.assert_called_once()
-
- # Clean up
- await async_gen.__aexit__(None, None, None)
-
-
-@pytest.mark.asyncio
-@patch("api.src.main.TTSModel")
-async def test_lifespan_cpu_fallback(mock_tts_model):
- """Test model warmup falling back to CPU"""
- # Mock file system for voice counting
- mock_tts_model.VOICES_DIR = "/mock/voices"
- with patch(
- "os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt", "voice4.pt"]
- ):
- mock_tts_model.setup.return_value = 4 # 4 voice files
- mock_tts_model.get_device.return_value = "cpu"
-
- # Create an async generator from the lifespan context manager
- async_gen = lifespan(MagicMock())
- await async_gen.__aenter__()
-
- # Verify model setup was called
- mock_tts_model.setup.assert_called_once()
-
- # Clean up
- await async_gen.__aexit__(None, None, None)
+ # Clean up
+ await async_gen.__aexit__(None, None, None)
diff --git a/api/tests/test_tts_implementations.py b/api/tests/test_tts_implementations.py
index 3f10c17..9e92392 100644
--- a/api/tests/test_tts_implementations.py
+++ b/api/tests/test_tts_implementations.py
@@ -16,13 +16,14 @@ def test_get_device_error():
with pytest.raises(RuntimeError, match="Model not initialized"):
TTSBaseModel.get_device()
+@pytest.mark.asyncio
@patch('torch.cuda.is_available')
@patch('os.path.exists')
@patch('os.path.join')
@patch('os.listdir')
@patch('torch.load')
@patch('torch.save')
-def test_setup_cuda_available(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
+async def test_setup_cuda_available(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
"""Test setup with CUDA available"""
TTSBaseModel._device = None
mock_cuda_available.return_value = True
@@ -36,17 +37,18 @@ def test_setup_cuda_available(mock_save, mock_load, mock_listdir, mock_join, moc
TTSBaseModel.process_text = MagicMock(return_value=("dummy", [1,2,3]))
TTSBaseModel.generate_from_tokens = MagicMock(return_value=np.zeros(1000))
- voice_count = TTSBaseModel.setup()
+ voice_count = await TTSBaseModel.setup()
assert TTSBaseModel._device == "cuda"
assert voice_count == 2
+@pytest.mark.asyncio
@patch('torch.cuda.is_available')
@patch('os.path.exists')
@patch('os.path.join')
@patch('os.listdir')
@patch('torch.load')
@patch('torch.save')
-def test_setup_cuda_unavailable(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
+async def test_setup_cuda_unavailable(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available):
"""Test setup with CUDA unavailable"""
TTSBaseModel._device = None
mock_cuda_available.return_value = False
@@ -60,7 +62,7 @@ def test_setup_cuda_unavailable(mock_save, mock_load, mock_listdir, mock_join, m
TTSBaseModel.process_text = MagicMock(return_value=("dummy", [1,2,3]))
TTSBaseModel.generate_from_tokens = MagicMock(return_value=np.zeros(1000))
- voice_count = TTSBaseModel.setup()
+ voice_count = await TTSBaseModel.setup()
assert TTSBaseModel._device == "cpu"
assert voice_count == 2
diff --git a/api/tests/test_tts_service.py b/api/tests/test_tts_service.py
index 4e63ff1..4fa2c52 100644
--- a/api/tests/test_tts_service.py
+++ b/api/tests/test_tts_service.py
@@ -31,27 +31,6 @@ def sample_audio():
return np.sin(2 * np.pi * frequency * t).astype(np.float32)
-def test_split_text(tts_service):
- """Test text splitting into sentences"""
- text = "First sentence. Second sentence! Third sentence?"
- sentences = tts_service._split_text(text)
- assert len(sentences) == 3
- assert sentences[0] == "First sentence."
- assert sentences[1] == "Second sentence!"
- assert sentences[2] == "Third sentence?"
-
-
-def test_split_text_empty(tts_service):
- """Test splitting empty text"""
- assert tts_service._split_text("") == []
-
-
-def test_split_text_single_sentence(tts_service):
- """Test splitting single sentence"""
- text = "Just one sentence."
- assert tts_service._split_text(text) == ["Just one sentence."]
-
-
def test_audio_to_bytes(tts_service, sample_audio):
"""Test converting audio tensor to bytes"""
audio_bytes = tts_service._audio_to_bytes(sample_audio)
@@ -152,7 +131,7 @@ def test_generate_audio_phonemize_error(
mock_torch_load.return_value = torch.zeros((10, 24000))
mock_generate.return_value = (None, None)
- with pytest.raises(ValueError, match="No audio chunks were generated successfully"):
+ with pytest.raises(ValueError, match="No chunks were processed successfully"):
tts_service._generate_audio("Test text", "af", 1.0)
@@ -185,7 +164,7 @@ def test_generate_audio_error(
mock_exists.return_value = True
mock_torch_load.return_value = torch.zeros((10, 24000))
- with pytest.raises(ValueError, match="No audio chunks were generated successfully"):
+ with pytest.raises(ValueError, match="No chunks were processed successfully"):
tts_service._generate_audio("Test text", "af", 1.0)
diff --git a/assets/format_comparison.png b/assets/format_comparison.png
new file mode 100644
index 0000000..f1669bc
Binary files /dev/null and b/assets/format_comparison.png differ
diff --git a/assets/gpu_first_token_latency_direct.png b/assets/gpu_first_token_latency_direct.png
new file mode 100644
index 0000000..769bbbc
Binary files /dev/null and b/assets/gpu_first_token_latency_direct.png differ
diff --git a/assets/gpu_first_token_latency_openai.png b/assets/gpu_first_token_latency_openai.png
new file mode 100644
index 0000000..06f1b13
Binary files /dev/null and b/assets/gpu_first_token_latency_openai.png differ
diff --git a/assets/gpu_first_token_timeline_direct.png b/assets/gpu_first_token_timeline_direct.png
new file mode 100644
index 0000000..37ae620
Binary files /dev/null and b/assets/gpu_first_token_timeline_direct.png differ
diff --git a/assets/gpu_first_token_timeline_openai.png b/assets/gpu_first_token_timeline_openai.png
new file mode 100644
index 0000000..fa566cc
Binary files /dev/null and b/assets/gpu_first_token_timeline_openai.png differ
diff --git a/assets/gpu_processing_time.png b/assets/gpu_processing_time.png
new file mode 100644
index 0000000..334e37a
Binary files /dev/null and b/assets/gpu_processing_time.png differ
diff --git a/assets/gpu_realtime_factor.png b/assets/gpu_realtime_factor.png
new file mode 100644
index 0000000..1e55996
Binary files /dev/null and b/assets/gpu_realtime_factor.png differ
diff --git a/assets/gpu_total_time_latency_direct.png b/assets/gpu_total_time_latency_direct.png
new file mode 100644
index 0000000..a76c467
Binary files /dev/null and b/assets/gpu_total_time_latency_direct.png differ
diff --git a/assets/gpu_total_time_latency_openai.png b/assets/gpu_total_time_latency_openai.png
new file mode 100644
index 0000000..0acec6a
Binary files /dev/null and b/assets/gpu_total_time_latency_openai.png differ
diff --git a/assets/voice_analysis.png b/assets/voice_analysis.png
new file mode 100644
index 0000000..401c718
Binary files /dev/null and b/assets/voice_analysis.png differ
diff --git a/docker-compose.cpu.yml b/docker-compose.cpu.yml
index f44f2d4..8a4e3b1 100644
--- a/docker-compose.cpu.yml
+++ b/docker-compose.cpu.yml
@@ -43,6 +43,7 @@ services:
- ONNX_OPTIMIZATION_LEVEL=all
- ONNX_MEMORY_PATTERN=true
- ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo
+
depends_on:
model-fetcher:
condition: service_healthy
diff --git a/docker-compose.yml b/docker-compose.yml
index 16f4b4b..5b6b31b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@ services:
model-fetcher:
image: datamachines/git-lfs:latest
environment:
- - SKIP_MODEL_FETCH=${SKIP_MODEL_FETCH:-true}
+ - SKIP_MODEL_FETCH=${SKIP_MODEL_FETCH:-false}
volumes:
- ./Kokoro-82M:/app/Kokoro-82M
working_dir: /app/Kokoro-82M
@@ -32,10 +32,10 @@ services:
start_period: 1s
kokoro-tts:
- image: ghcr.io/remsky/kokoro-fastapi:latest
+ # image: ghcr.io/remsky/kokoro-fastapi:latest
# Uncomment below to build from source instead of using the released image
- # build:
- # context: .
+ build:
+ context: .
volumes:
- ./api/src:/app/api/src
- ./Kokoro-82M:/app/Kokoro-82M
@@ -54,14 +54,14 @@ services:
model-fetcher:
condition: service_healthy
- # # Gradio UI service [Comment out everything below if you don't need it]
- # gradio-ui:
- # build:
- # context: ./ui
- # ports:
- # - "7860:7860"
- # volumes:
- # - ./ui/data:/app/ui/data
- # - ./ui/app.py:/app/app.py # Mount app.py for hot reload
- # environment:
- # - GRADIO_WATCH=True # Enable hot reloading
+ # Gradio UI service [Comment out everything below if you don't need it]
+ gradio-ui:
+ build:
+ context: ./ui
+ ports:
+ - "7860:7860"
+ volumes:
+ - ./ui/data:/app/ui/data
+ - ./ui/app.py:/app/app.py # Mount app.py for hot reload
+ environment:
+ - GRADIO_WATCH=True # Enable hot reloading
diff --git a/examples/assorted_checks/benchmarks/benchmark_first_token.py b/examples/assorted_checks/benchmarks/benchmark_first_token.py
index 6709876..a9e47bb 100644
--- a/examples/assorted_checks/benchmarks/benchmark_first_token.py
+++ b/examples/assorted_checks/benchmarks/benchmark_first_token.py
@@ -1,15 +1,19 @@
#!/usr/bin/env python3
import os
-import time
import json
-import numpy as np
-import requests
-import pandas as pd
-from lib.shared_benchmark_utils import get_text_for_tokens, enc
-from lib.shared_utils import save_json_results
-from lib.shared_plotting import plot_correlation, plot_timeline
+import time
-def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int) -> dict:
+import numpy as np
+import pandas as pd
+import requests
+from lib.shared_utils import save_json_results
+from lib.shared_plotting import plot_timeline, plot_correlation
+from lib.shared_benchmark_utils import enc, get_text_for_tokens
+
+
+def measure_first_token(
+ text: str, output_dir: str, tokens: int, run_number: int
+) -> dict:
"""Measure time to audio via API calls and save the audio output"""
results = {
"text_length": len(text),
@@ -18,12 +22,12 @@ def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int
"time_to_first_chunk": None,
"error": None,
"audio_path": None,
- "audio_length": None # Length of output audio in seconds
+ "audio_length": None, # Length of output audio in seconds
}
-
+
try:
start_time = time.time()
-
+
# Make request without streaming
response = requests.post(
"http://localhost:8880/v1/audio/speech",
@@ -32,58 +36,62 @@ def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int
"input": text,
"voice": "af",
"response_format": "wav",
- "stream": False
+ "stream": False,
},
- timeout=1800
+ timeout=1800,
)
response.raise_for_status()
-
+
# Save complete audio
audio_filename = f"benchmark_tokens{tokens}_run{run_number}.wav"
audio_path = os.path.join(output_dir, audio_filename)
results["audio_path"] = audio_path
-
+
content = response.content
- with open(audio_path, 'wb') as f:
+ with open(audio_path, "wb") as f:
f.write(content)
-
+
# Calculate audio length using scipy
import scipy.io.wavfile as wavfile
+
sample_rate, audio_data = wavfile.read(audio_path)
results["audio_length"] = len(audio_data) / sample_rate # Length in seconds
results["time_to_first_chunk"] = time.time() - start_time
-
+
results["total_time"] = time.time() - start_time
return results
-
+
except Exception as e:
results["error"] = str(e)
return results
+
def main():
# Set up paths
script_dir = os.path.dirname(os.path.abspath(__file__))
output_dir = os.path.join(script_dir, "output_audio")
output_data_dir = os.path.join(script_dir, "output_data")
-
+
# Create output directories
os.makedirs(output_dir, exist_ok=True)
os.makedirs(output_data_dir, exist_ok=True)
# Load sample text
- with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
+ with open(
+ os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8"
+ ) as f:
text = f.read()
# Test specific token counts
token_sizes = [10, 25, 50, 100, 200, 500]
all_results = []
-
+
for tokens in token_sizes:
print(f"\nTesting {tokens} tokens")
test_text = get_text_for_tokens(text, tokens)
actual_tokens = len(enc.encode(test_text))
print(f"Text preview: {test_text[:50]}...")
-
+
# Run test 3 times for each size to get average
for i in range(5):
print(f"Run {i+1}/3...")
@@ -91,67 +99,74 @@ def main():
result["target_tokens"] = tokens
result["actual_tokens"] = actual_tokens
result["run_number"] = i + 1
-
+
print(f"Time to Audio: {result.get('time_to_first_chunk', 'N/A'):.3f}s")
print(f"Total time: {result.get('total_time', 'N/A'):.3f}s")
-
+
if result["error"]:
print(f"Error: {result['error']}")
-
+
all_results.append(result)
-
+
# Calculate averages per token size
summary = {}
for tokens in token_sizes:
- matching_results = [r for r in all_results if r["target_tokens"] == tokens and not r["error"]]
+ matching_results = [
+ r for r in all_results if r["target_tokens"] == tokens and not r["error"]
+ ]
if matching_results:
- avg_first_chunk = sum(r["time_to_first_chunk"] for r in matching_results) / len(matching_results)
- avg_total = sum(r["total_time"] for r in matching_results) / len(matching_results)
- avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(matching_results)
+ avg_first_chunk = sum(
+ r["time_to_first_chunk"] for r in matching_results
+ ) / len(matching_results)
+ avg_total = sum(r["total_time"] for r in matching_results) / len(
+ matching_results
+ )
+ avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(
+ matching_results
+ )
summary[tokens] = {
"avg_time_to_first_chunk": round(avg_first_chunk, 3),
"avg_total_time": round(avg_total, 3),
"avg_audio_length": round(avg_audio_length, 3),
- "num_successful_runs": len(matching_results)
+ "num_successful_runs": len(matching_results),
}
-
+
# Save results
# Save results
results_data = {
"individual_runs": all_results,
"summary": summary,
- "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
}
save_json_results(
- results_data,
- os.path.join(output_data_dir, "first_token_benchmark.json")
+ results_data, os.path.join(output_data_dir, "first_token_benchmark.json")
)
-
+
# Create plot directory if it doesn't exist
output_plots_dir = os.path.join(script_dir, "output_plots")
os.makedirs(output_plots_dir, exist_ok=True)
-
+
# Create DataFrame for plotting
df = pd.DataFrame(all_results)
-
+
# Create both plots
plot_correlation(
- df, "target_tokens", "time_to_first_chunk",
+ df,
+ "target_tokens",
+ "time_to_first_chunk",
"Time to Audio vs Input Size",
"Number of Input Tokens",
"Time to Audio (seconds)",
- os.path.join(output_plots_dir, "first_token_latency.png")
+ os.path.join(output_plots_dir, "first_token_latency.png"),
)
-
- plot_timeline(
- df,
- os.path.join(output_plots_dir, "first_token_timeline.png")
- )
-
+
+ plot_timeline(df, os.path.join(output_plots_dir, "first_token_timeline.png"))
+
print("\nResults and plots saved to:")
print(f"- {os.path.join(output_data_dir, 'first_token_benchmark.json')}")
print(f"- {os.path.join(output_plots_dir, 'first_token_latency.png')}")
print(f"- {os.path.join(output_plots_dir, 'first_token_timeline.png')}")
+
if __name__ == "__main__":
main()
diff --git a/examples/assorted_checks/benchmarks/benchmark_first_token_stream.py b/examples/assorted_checks/benchmarks/benchmark_first_token_stream.py
deleted file mode 100644
index 9e4deba..0000000
--- a/examples/assorted_checks/benchmarks/benchmark_first_token_stream.py
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/usr/bin/env python3
-import os
-import time
-import json
-import numpy as np
-import requests
-import pandas as pd
-from lib.shared_benchmark_utils import get_text_for_tokens, enc
-from lib.shared_utils import save_json_results
-from lib.shared_plotting import plot_correlation, plot_timeline
-
-def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int) -> dict:
- """Measure time to audio via API calls and save the audio output"""
- results = {
- "text_length": len(text),
- "token_count": len(enc.encode(text)),
- "total_time": None,
- "time_to_first_chunk": None,
- "error": None,
- "audio_path": None,
- "audio_length": None # Length of output audio in seconds
- }
-
- try:
- start_time = time.time()
-
- # Make request with streaming enabled
- response = requests.post(
- "http://localhost:8880/v1/audio/speech",
- json={
- "model": "kokoro",
- "input": text,
- "voice": "af",
- "response_format": "pcm",
- "stream": True
- },
- stream=True,
- timeout=1800
- )
- response.raise_for_status()
-
- # Save complete audio
- audio_filename = f"benchmark_tokens{tokens}_run{run_number}_stream.wav"
- audio_path = os.path.join(output_dir, audio_filename)
- results["audio_path"] = audio_path
-
- first_chunk_time = None
- chunks = []
- for chunk in response.iter_content(chunk_size=1024):
- if chunk:
- if first_chunk_time is None:
- first_chunk_time = time.time()
- results["time_to_first_chunk"] = first_chunk_time - start_time
- chunks.append(chunk)
-
- # Concatenate all PCM chunks
- if not chunks:
- raise ValueError("No audio chunks received")
-
- all_audio_data = b''.join(chunks)
-
- # Write as WAV file
- import wave
- with wave.open(audio_path, 'wb') as wav_file:
- wav_file.setnchannels(1) # Mono
- wav_file.setsampwidth(2) # 2 bytes per sample (16-bit)
- wav_file.setframerate(24000) # Known sample rate for Kokoro
- wav_file.writeframes(all_audio_data)
-
- # Calculate audio length using scipy
- import scipy.io.wavfile as wavfile
- sample_rate, audio_data = wavfile.read(audio_path)
- results["audio_length"] = len(audio_data) / sample_rate # Length in seconds
-
- results["total_time"] = time.time() - start_time
-
- # Print debug info
- print(f"Complete audio size: {len(all_audio_data)} bytes")
- print(f"Number of chunks received: {len(chunks)}")
- print(f"Audio length: {results['audio_length']:.3f}s")
-
- return results
-
- except Exception as e:
- results["error"] = str(e)
- return results
-
-def main():
- # Set up paths with _stream suffix
- script_dir = os.path.dirname(os.path.abspath(__file__))
- output_dir = os.path.join(script_dir, "output_audio_stream")
- output_data_dir = os.path.join(script_dir, "output_data")
-
- # Create output directories
- os.makedirs(output_dir, exist_ok=True)
- os.makedirs(output_data_dir, exist_ok=True)
-
- # Load sample text
- with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
- text = f.read()
-
- # Test specific token counts
- token_sizes = [50, 100, 200, 500, 1000, 2000, 5000, 10000]
- all_results = []
-
- for tokens in token_sizes:
- print(f"\nTesting {tokens} tokens (streaming)")
- test_text = get_text_for_tokens(text, tokens)
- actual_tokens = len(enc.encode(test_text))
- print(f"Text preview: {test_text[:50]}...")
-
- # Run test 3 times for each size to get average
- for i in range(5):
- print(f"Run {i+1}/3...")
- result = measure_first_token(test_text, output_dir, tokens, i + 1)
- result["target_tokens"] = tokens
- result["actual_tokens"] = actual_tokens
- result["run_number"] = i + 1
-
- print(f"Time to First Audio: {result.get('time_to_first_chunk', 'N/A'):.3f}s")
- print(f"Time to Save Complete: {result.get('total_time', 'N/A'):.3f}s")
- print(f"Audio length: {result.get('audio_length', 'N/A'):.3f}s")
- print(f"Streaming overhead: {(result.get('total_time', 0) - result.get('time_to_first_chunk', 0)):.3f}s")
-
- if result["error"]:
- print(f"Error: {result['error']}")
-
- all_results.append(result)
-
- # Calculate averages per token size
- summary = {}
- for tokens in token_sizes:
- matching_results = [r for r in all_results if r["target_tokens"] == tokens and not r["error"]]
- if matching_results:
- avg_first_chunk = sum(r["time_to_first_chunk"] for r in matching_results) / len(matching_results)
- avg_total = sum(r["total_time"] for r in matching_results) / len(matching_results)
- avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(matching_results)
- summary[tokens] = {
- "avg_time_to_first_chunk": round(avg_first_chunk, 3),
- "avg_total_time": round(avg_total, 3),
- "avg_audio_length": round(avg_audio_length, 3),
- "num_successful_runs": len(matching_results)
- }
-
- # Save results with _stream suffix
- results_data = {
- "individual_runs": all_results,
- "summary": summary,
- "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
- }
- save_json_results(
- results_data,
- os.path.join(output_data_dir, "first_token_benchmark_stream.json")
- )
-
- # Create plot directory if it doesn't exist
- output_plots_dir = os.path.join(script_dir, "output_plots")
- os.makedirs(output_plots_dir, exist_ok=True)
-
- # Create DataFrame for plotting
- df = pd.DataFrame(all_results)
-
- # Create both plots with _stream suffix
- # Plot correlation for both metrics
- plot_correlation(
- df, "target_tokens", "time_to_first_chunk",
- "Time to First Audio vs Input Size (Streaming)",
- "Number of Input Tokens",
- "Time to First Audio (seconds)",
- os.path.join(output_plots_dir, "first_token_latency_stream.png")
- )
-
- plot_correlation(
- df, "target_tokens", "total_time",
- "Total Time vs Input Size (Streaming)",
- "Number of Input Tokens",
- "Total Time (seconds)",
- os.path.join(output_plots_dir, "total_time_latency_stream.png")
- )
-
- plot_timeline(
- df,
- os.path.join(output_plots_dir, "first_token_timeline_stream.png", suffix="(Streaming)")
- )
-
- print("\nResults and plots saved to:")
- print(f"- {os.path.join(output_data_dir, 'first_token_benchmark_stream.json')}")
- print(f"- {os.path.join(output_plots_dir, 'first_token_latency_stream.png')}")
- print(f"- {os.path.join(output_plots_dir, 'total_time_latency_stream.png')}")
- print(f"- {os.path.join(output_plots_dir, 'first_token_timeline_stream.png')}")
-
-if __name__ == "__main__":
- main()
diff --git a/examples/assorted_checks/benchmarks/benchmark_first_token_stream_openai.py b/examples/assorted_checks/benchmarks/benchmark_first_token_stream_openai.py
deleted file mode 100644
index 3adc866..0000000
--- a/examples/assorted_checks/benchmarks/benchmark_first_token_stream_openai.py
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/env python3
-import os
-import time
-import json
-import numpy as np
-import pandas as pd
-from openai import OpenAI
-from lib.shared_benchmark_utils import get_text_for_tokens, enc
-from lib.shared_utils import save_json_results
-from lib.shared_plotting import plot_correlation, plot_timeline
-
-def measure_first_token(text: str, output_dir: str, tokens: int, run_number: int) -> dict:
- """Measure time to audio via OpenAI API calls and save the audio output"""
- results = {
- "text_length": len(text),
- "token_count": len(enc.encode(text)),
- "total_time": None,
- "time_to_first_chunk": None,
- "error": None,
- "audio_path": None,
- "audio_length": None # Length of output audio in seconds
- }
-
- try:
- start_time = time.time()
-
- # Initialize OpenAI client
- openai = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed-for-local")
-
- # Save complete audio
- audio_filename = f"benchmark_tokens{tokens}_run{run_number}_stream_openai.wav"
- audio_path = os.path.join(output_dir, audio_filename)
- results["audio_path"] = audio_path
-
- first_chunk_time = None
- all_audio_data = bytearray()
- chunk_count = 0
-
- # Make streaming request using OpenAI client
- with openai.audio.speech.with_streaming_response.create(
- model="kokoro",
- voice="af",
- response_format="pcm",
- input=text,
- ) as response:
- for chunk in response.iter_bytes(chunk_size=1024):
- if chunk:
- chunk_count += 1
- if first_chunk_time is None:
- first_chunk_time = time.time()
- results["time_to_first_chunk"] = first_chunk_time - start_time
- all_audio_data.extend(chunk)
-
- # Write as WAV file
- import wave
- with wave.open(audio_path, 'wb') as wav_file:
- wav_file.setnchannels(1) # Mono
- wav_file.setsampwidth(2) # 2 bytes per sample (16-bit)
- wav_file.setframerate(24000) # Known sample rate for Kokoro
- wav_file.writeframes(all_audio_data)
-
- # Calculate audio length using scipy
- import scipy.io.wavfile as wavfile
- sample_rate, audio_data = wavfile.read(audio_path)
- results["audio_length"] = len(audio_data) / sample_rate # Length in seconds
-
- results["total_time"] = time.time() - start_time
-
- # Print debug info
- print(f"Complete audio size: {len(all_audio_data)} bytes")
- print(f"Number of chunks received: {chunk_count}")
- print(f"Audio length: {results['audio_length']:.3f}s")
-
- return results
-
- except Exception as e:
- results["error"] = str(e)
- return results
-
-def main():
- # Set up paths with _stream_openai suffix
- script_dir = os.path.dirname(os.path.abspath(__file__))
- output_dir = os.path.join(script_dir, "output_audio_stream_openai")
- output_data_dir = os.path.join(script_dir, "output_data")
-
- # Create output directories
- os.makedirs(output_dir, exist_ok=True)
- os.makedirs(output_data_dir, exist_ok=True)
-
- # Load sample text
- with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
- text = f.read()
-
- # Test specific token counts
- token_sizes = [50, 100, 200, 500]
- all_results = []
-
- for tokens in token_sizes:
- print(f"\nTesting {tokens} tokens (streaming)")
- test_text = get_text_for_tokens(text, tokens)
- actual_tokens = len(enc.encode(test_text))
- print(f"Text preview: {test_text[:50]}...")
-
- # Run test 5 times for each size to get average
- for i in range(5):
- print(f"Run {i+1}/5...")
- result = measure_first_token(test_text, output_dir, tokens, i + 1)
- result["target_tokens"] = tokens
- result["actual_tokens"] = actual_tokens
- result["run_number"] = i + 1
-
- print(f"Time to First Audio: {result.get('time_to_first_chunk', 'N/A'):.3f}s")
- print(f"Time to Save Complete: {result.get('total_time', 'N/A'):.3f}s")
- print(f"Audio length: {result.get('audio_length', 'N/A'):.3f}s")
- print(f"Streaming overhead: {(result.get('total_time', 0) - result.get('time_to_first_chunk', 0)):.3f}s")
-
- if result["error"]:
- print(f"Error: {result['error']}")
-
- all_results.append(result)
-
- # Calculate averages per token size
- summary = {}
- for tokens in token_sizes:
- matching_results = [r for r in all_results if r["target_tokens"] == tokens and not r["error"]]
- if matching_results:
- avg_first_chunk = sum(r["time_to_first_chunk"] for r in matching_results) / len(matching_results)
- avg_total = sum(r["total_time"] for r in matching_results) / len(matching_results)
- avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(matching_results)
- summary[tokens] = {
- "avg_time_to_first_chunk": round(avg_first_chunk, 3),
- "avg_total_time": round(avg_total, 3),
- "avg_audio_length": round(avg_audio_length, 3),
- "num_successful_runs": len(matching_results)
- }
-
- # Save results with _stream_openai suffix
- results_data = {
- "individual_runs": all_results,
- "summary": summary,
- "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
- }
- save_json_results(
- results_data,
- os.path.join(output_data_dir, "first_token_benchmark_stream_openai.json")
- )
-
- # Create plot directory if it doesn't exist
- output_plots_dir = os.path.join(script_dir, "output_plots")
- os.makedirs(output_plots_dir, exist_ok=True)
-
- # Create DataFrame for plotting
- df = pd.DataFrame(all_results)
-
- # Create plots with _stream_openai suffix
- plot_correlation(
- df, "target_tokens", "time_to_first_chunk",
- "Time to First Audio vs Input Size (OpenAI Streaming)",
- "Number of Input Tokens",
- "Time to First Audio (seconds)",
- os.path.join(output_plots_dir, "first_token_latency_stream_openai.png")
- )
-
- plot_correlation(
- df, "target_tokens", "total_time",
- "Total Time vs Input Size (OpenAI Streaming)",
- "Number of Input Tokens",
- "Total Time (seconds)",
- os.path.join(output_plots_dir, "total_time_latency_stream_openai.png")
- )
-
- plot_timeline(
- df,
- os.path.join(output_plots_dir, "first_token_timeline_stream_openai.png")
- )
-
- print("\nResults and plots saved to:")
- print(f"- {os.path.join(output_data_dir, 'first_token_benchmark_stream_openai.json')}")
- print(f"- {os.path.join(output_plots_dir, 'first_token_latency_stream_openai.png')}")
- print(f"- {os.path.join(output_plots_dir, 'total_time_latency_stream_openai.png')}")
- print(f"- {os.path.join(output_plots_dir, 'first_token_timeline_stream_openai.png')}")
-
-if __name__ == "__main__":
- main()
diff --git a/examples/assorted_checks/benchmarks/benchmark_first_token_stream_unified.py b/examples/assorted_checks/benchmarks/benchmark_first_token_stream_unified.py
new file mode 100644
index 0000000..0b673ae
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/benchmark_first_token_stream_unified.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+import os
+import time
+
+import requests
+from openai import OpenAI
+from lib.stream_utils import run_benchmark
+
+OPENAI_CLIENT = OpenAI(
+ base_url="http://localhost:8880/v1", api_key="not-needed-for-local"
+)
+
+
+def measure_first_token_requests(
+ text: str, output_dir: str, tokens: int, run_number: int
+) -> dict:
+ """Measure time to audio via direct API calls and save the audio output"""
+ results = {
+ "text_length": len(text),
+ "token_count": None, # Will be set by run_benchmark
+ "total_time": None,
+ "time_to_first_chunk": None,
+ "error": None,
+ "audio_path": None,
+ "audio_length": None,
+ }
+
+ try:
+ start_time = time.time()
+
+ # Make request with streaming enabled
+ response = requests.post(
+ "http://localhost:8880/v1/audio/speech",
+ json={
+ "model": "kokoro",
+ "input": text,
+ "voice": "af",
+ "response_format": "pcm",
+ "stream": True,
+ },
+ stream=True,
+ timeout=1800,
+ )
+ response.raise_for_status()
+
+ # Save complete audio
+ audio_filename = f"benchmark_tokens{tokens}_run{run_number}_stream.wav"
+ audio_path = os.path.join(output_dir, audio_filename)
+ results["audio_path"] = audio_path
+
+ first_chunk_time = None
+ chunks = []
+ for chunk in response.iter_content(chunk_size=1024):
+ if chunk:
+ if first_chunk_time is None:
+ first_chunk_time = time.time()
+ results["time_to_first_chunk"] = first_chunk_time - start_time
+ chunks.append(chunk)
+
+ # Concatenate all PCM chunks
+ if not chunks:
+ raise ValueError("No audio chunks received")
+
+ all_audio_data = b"".join(chunks)
+
+ # Write as WAV file
+ import wave
+
+ with wave.open(audio_path, "wb") as wav_file:
+ wav_file.setnchannels(1) # Mono
+ wav_file.setsampwidth(2) # 2 bytes per sample (16-bit)
+ wav_file.setframerate(24000) # Known sample rate for Kokoro
+ wav_file.writeframes(all_audio_data)
+
+ # Calculate audio length using scipy
+ import scipy.io.wavfile as wavfile
+
+ sample_rate, audio_data = wavfile.read(audio_path)
+ results["audio_length"] = len(audio_data) / sample_rate # Length in seconds
+
+ results["total_time"] = time.time() - start_time
+
+ # Print debug info
+ print(f"Complete audio size: {len(all_audio_data)} bytes")
+ print(f"Number of chunks received: {len(chunks)}")
+ print(f"Audio length: {results['audio_length']:.3f}s")
+
+ return results
+
+ except Exception as e:
+ results["error"] = str(e)
+ return results
+
+
+def measure_first_token_openai(
+ text: str, output_dir: str, tokens: int, run_number: int
+) -> dict:
+ """Measure time to audio via OpenAI API calls and save the audio output"""
+ results = {
+ "text_length": len(text),
+ "token_count": None, # Will be set by run_benchmark
+ "total_time": None,
+ "time_to_first_chunk": None,
+ "error": None,
+ "audio_path": None,
+ "audio_length": None,
+ }
+
+ try:
+ start_time = time.time()
+
+ # Initialize OpenAI client
+
+ # Save complete audio
+ audio_filename = f"benchmark_tokens{tokens}_run{run_number}_stream_openai.wav"
+ audio_path = os.path.join(output_dir, audio_filename)
+ results["audio_path"] = audio_path
+
+ first_chunk_time = None
+ all_audio_data = bytearray()
+ chunk_count = 0
+
+ # Make streaming request using OpenAI client
+ with OPENAI_CLIENT.audio.speech.with_streaming_response.create(
+ model="kokoro",
+ voice="af",
+ response_format="pcm",
+ input=text,
+ ) as response:
+ for chunk in response.iter_bytes(chunk_size=1024):
+ if chunk:
+ chunk_count += 1
+ if first_chunk_time is None:
+ first_chunk_time = time.time()
+ results["time_to_first_chunk"] = first_chunk_time - start_time
+ all_audio_data.extend(chunk)
+
+ # Write as WAV file
+ import wave
+
+ with wave.open(audio_path, "wb") as wav_file:
+ wav_file.setnchannels(1) # Mono
+ wav_file.setsampwidth(2) # 2 bytes per sample (16-bit)
+ wav_file.setframerate(24000) # Known sample rate for Kokoro
+ wav_file.writeframes(all_audio_data)
+
+ # Calculate audio length using scipy
+ import scipy.io.wavfile as wavfile
+
+ sample_rate, audio_data = wavfile.read(audio_path)
+ results["audio_length"] = len(audio_data) / sample_rate # Length in seconds
+
+ results["total_time"] = time.time() - start_time
+
+ # Print debug info
+ print(f"Complete audio size: {len(all_audio_data)} bytes")
+ print(f"Number of chunks received: {chunk_count}")
+ print(f"Audio length: {results['audio_length']:.3f}s")
+
+ return results
+
+ except Exception as e:
+ results["error"] = str(e)
+ return results
+
+
+def main():
+ script_dir = os.path.dirname(os.path.abspath(__file__))
+ prefix='cpu'
+ # Run requests benchmark
+ print("\n=== Running Direct Requests Benchmark ===")
+ run_benchmark(
+ measure_first_token_requests,
+ output_dir=os.path.join(script_dir, "output_audio_stream"),
+ output_data_dir=os.path.join(script_dir, "output_data"),
+ output_plots_dir=os.path.join(script_dir, "output_plots"),
+ suffix="_stream",
+ plot_title_suffix="(Streaming)",
+ prefix=prefix
+ )
+ # Run OpenAI benchmark
+ print("\n=== Running OpenAI Library Benchmark ===")
+ run_benchmark(
+ measure_first_token_openai,
+ output_dir=os.path.join(script_dir, "output_audio_stream_openai"),
+ output_data_dir=os.path.join(script_dir, "output_data"),
+ output_plots_dir=os.path.join(script_dir, "output_plots"),
+ suffix="_stream_openai",
+ plot_title_suffix="(OpenAI Streaming)",
+ prefix=prefix
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py b/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py
index 385abb0..7e4940d 100644
--- a/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py
+++ b/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py
@@ -1,30 +1,37 @@
#!/usr/bin/env python3
import os
+import sys
import json
import time
-import threading
import queue
-import pandas as pd
-import sys
+import threading
from datetime import datetime
-from lib.shared_plotting import plot_system_metrics, plot_correlation
+import pandas as pd
from lib.shared_utils import (
- get_system_metrics, save_json_results, write_benchmark_stats,
- real_time_factor
+ real_time_factor,
+ save_json_results,
+ get_system_metrics,
+ write_benchmark_stats,
)
+from lib.shared_plotting import plot_correlation, plot_system_metrics
from lib.shared_benchmark_utils import (
- get_text_for_tokens, make_tts_request, generate_token_sizes, enc
+ enc,
+ make_tts_request,
+ get_text_for_tokens,
+ generate_token_sizes,
)
+
class SystemMonitor:
def __init__(self, interval=1.0):
+ """Rough system tracker: Not always accurate"""
self.interval = interval
self.metrics_queue = queue.Queue()
self.stop_event = threading.Event()
self.metrics_timeline = []
self.start_time = None
-
+
def _monitor_loop(self):
"""Background thread function to collect system metrics."""
while not self.stop_event.is_set():
@@ -32,20 +39,20 @@ class SystemMonitor:
metrics["relative_time"] = time.time() - self.start_time
self.metrics_queue.put(metrics)
time.sleep(self.interval)
-
+
def start(self):
"""Start the monitoring thread."""
self.start_time = time.time()
self.monitor_thread = threading.Thread(target=self._monitor_loop)
self.monitor_thread.daemon = True
self.monitor_thread.start()
-
+
def stop(self):
"""Stop the monitoring thread and collect final metrics."""
self.stop_event.set()
- if hasattr(self, 'monitor_thread'):
+ if hasattr(self, "monitor_thread"):
self.monitor_thread.join(timeout=2)
-
+
# Collect all metrics from queue
while True:
try:
@@ -53,23 +60,24 @@ class SystemMonitor:
self.metrics_timeline.append(metrics)
except queue.Empty:
break
-
+
return self.metrics_timeline
+
def main():
# Initialize system monitor
monitor = SystemMonitor(interval=1.0) # 1 second interval
# Set prefix for output files (e.g. "gpu", "cpu", "onnx", etc.)
- prefix = "gpu"
+ prefix = "cpu"
# Generate token sizes
- if 'gpu' in prefix:
+ if "gpu" in prefix:
token_sizes = generate_token_sizes(
- max_tokens=5000, dense_step=150,
- dense_max=1000, sparse_step=1000)
- elif 'cpu' in prefix:
+ max_tokens=1000, dense_step=150, dense_max=1000, sparse_step=1000
+ )
+ elif "cpu" in prefix:
token_sizes = generate_token_sizes(
- max_tokens=1000, dense_step=300,
- dense_max=1000, sparse_step=0)
+ max_tokens=1000, dense_step=100, dense_max=500, sparse_step=250
+ )
else:
token_sizes = generate_token_sizes(max_tokens=3000)
@@ -78,7 +86,7 @@ def main():
output_dir = os.path.join(script_dir, "output_audio")
output_data_dir = os.path.join(script_dir, "output_data")
output_plots_dir = os.path.join(script_dir, "output_plots")
-
+
# Create output directories
os.makedirs(output_dir, exist_ok=True)
os.makedirs(output_data_dir, exist_ok=True)
@@ -90,7 +98,9 @@ def main():
filename = f"{prefix}_{filename}"
return os.path.join(path, filename)
- with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f:
+ with open(
+ os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8"
+ ) as f:
text = f.read()
total_tokens = len(enc.encode(text))
@@ -100,7 +110,7 @@ def main():
results = []
test_start_time = time.time()
-
+
# Start system monitoring
monitor.start()
@@ -114,7 +124,8 @@ def main():
processing_time, audio_length = make_tts_request(
chunk,
output_dir=output_dir,
- prefix=prefix
+ prefix=prefix,
+ stream=False, # Use non-streaming mode for RTF benchmarking
)
if processing_time is None or audio_length is None:
print("Breaking loop due to error")
@@ -123,14 +134,16 @@ def main():
# Calculate RTF using the correct formula
rtf = real_time_factor(processing_time, audio_length)
print(f"Real-Time Factor: {rtf:.5f}")
-
- results.append({
- "tokens": actual_tokens,
- "processing_time": processing_time,
- "output_length": audio_length,
- "rtf": rtf,
- "elapsed_time": round(time.time() - test_start_time, 2),
- })
+
+ results.append(
+ {
+ "tokens": actual_tokens,
+ "processing_time": processing_time,
+ "output_length": audio_length,
+ "rtf": rtf,
+ "elapsed_time": round(time.time() - test_start_time, 5),
+ }
+ )
df = pd.DataFrame(results)
if df.empty:
@@ -144,89 +157,101 @@ def main():
{
"title": "Benchmark Statistics (with correct RTF)",
"stats": {
- "Total tokens processed": df['tokens'].sum(),
- "Total audio generated (s)": df['output_length'].sum(),
- "Total test duration (s)": df['elapsed_time'].max(),
- "Average processing rate (tokens/s)": df['tokens_per_second'].mean(),
- "Average RTF": df['rtf'].mean(),
- "Average Real Time Speed": 1/df['rtf'].mean()
- }
+ "Total tokens processed": df["tokens"].sum(),
+ "Total audio generated (s)": df["output_length"].sum(),
+ "Total test duration (s)": df["elapsed_time"].max(),
+ "Average processing rate (tokens/s)": df["tokens_per_second"].mean(),
+ "Average RTF": df["rtf"].mean(),
+ "Average Real Time Speed": 1 / df["rtf"].mean(),
+ },
},
{
"title": "Per-chunk Stats",
"stats": {
- "Average chunk size (tokens)": df['tokens'].mean(),
- "Min chunk size (tokens)": df['tokens'].min(),
- "Max chunk size (tokens)": df['tokens'].max(),
- "Average processing time (s)": df['processing_time'].mean(),
- "Average output length (s)": df['output_length'].mean()
- }
+ "Average chunk size (tokens)": df["tokens"].mean(),
+ "Min chunk size (tokens)": df["tokens"].min(),
+ "Max chunk size (tokens)": df["tokens"].max(),
+ "Average processing time (s)": df["processing_time"].mean(),
+ "Average output length (s)": df["output_length"].mean(),
+ },
},
{
"title": "Performance Ranges",
"stats": {
"Processing rate range (tokens/s)": f"{df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f}",
"RTF range": f"{df['rtf'].min():.2f}x - {df['rtf'].max():.2f}x",
- "Real Time Speed range": f"{1/df['rtf'].max():.2f}x - {1/df['rtf'].min():.2f}x"
- }
- }
+ "Real Time Speed range": f"{1/df['rtf'].max():.2f}x - {1/df['rtf'].min():.2f}x",
+ },
+ },
]
- write_benchmark_stats(stats, prefix_path(output_data_dir, "benchmark_stats_rtf.txt"))
+ write_benchmark_stats(
+ stats, prefix_path(output_data_dir, "benchmark_stats_rtf.txt")
+ )
# Plot Processing Time vs Token Count
plot_correlation(
- df, "tokens", "processing_time",
+ df,
+ "tokens",
+ "processing_time",
"Processing Time vs Input Size",
"Number of Input Tokens",
"Processing Time (seconds)",
- prefix_path(output_plots_dir, "processing_time_rtf.png")
+ prefix_path(output_plots_dir, "processing_time_rtf.png"),
)
# Plot RTF vs Token Count
plot_correlation(
- df, "tokens", "rtf",
+ df,
+ "tokens",
+ "rtf",
"Real-Time Factor vs Input Size",
"Number of Input Tokens",
"Real-Time Factor (processing time / audio length)",
- prefix_path(output_plots_dir, "realtime_factor_rtf.png")
+ prefix_path(output_plots_dir, "realtime_factor_rtf.png"),
)
# Stop monitoring and get final metrics
final_metrics = monitor.stop()
-
+
# Convert metrics timeline to DataFrame for stats
metrics_df = pd.DataFrame(final_metrics)
-
+
# Add system usage stats
if not metrics_df.empty:
- stats.append({
- "title": "System Usage Statistics",
- "stats": {
- "Peak CPU Usage (%)": metrics_df['cpu_percent'].max(),
- "Avg CPU Usage (%)": metrics_df['cpu_percent'].mean(),
- "Peak RAM Usage (%)": metrics_df['ram_percent'].max(),
- "Avg RAM Usage (%)": metrics_df['ram_percent'].mean(),
- "Peak RAM Used (GB)": metrics_df['ram_used_gb'].max(),
- "Avg RAM Used (GB)": metrics_df['ram_used_gb'].mean(),
+ stats.append(
+ {
+ "title": "System Usage Statistics",
+ "stats": {
+ "Peak CPU Usage (%)": metrics_df["cpu_percent"].max(),
+ "Avg CPU Usage (%)": metrics_df["cpu_percent"].mean(),
+ "Peak RAM Usage (%)": metrics_df["ram_percent"].max(),
+ "Avg RAM Usage (%)": metrics_df["ram_percent"].mean(),
+ "Peak RAM Used (GB)": metrics_df["ram_used_gb"].max(),
+ "Avg RAM Used (GB)": metrics_df["ram_used_gb"].mean(),
+ },
}
- })
- if 'gpu_memory_used' in metrics_df:
- stats[-1]["stats"].update({
- "Peak GPU Memory (MB)": metrics_df['gpu_memory_used'].max(),
- "Avg GPU Memory (MB)": metrics_df['gpu_memory_used'].mean(),
- })
-
+ )
+ if "gpu_memory_used" in metrics_df:
+ stats[-1]["stats"].update(
+ {
+ "Peak GPU Memory (MB)": metrics_df["gpu_memory_used"].max(),
+ "Avg GPU Memory (MB)": metrics_df["gpu_memory_used"].mean(),
+ }
+ )
+
# Plot system metrics
- plot_system_metrics(final_metrics, prefix_path(output_plots_dir, "system_usage_rtf.png"))
+ plot_system_metrics(
+ final_metrics, prefix_path(output_plots_dir, "system_usage_rtf.png")
+ )
# Save final results
save_json_results(
{
"results": results,
"system_metrics": final_metrics,
- "test_duration": time.time() - test_start_time
+ "test_duration": time.time() - test_start_time,
},
- prefix_path(output_data_dir, "benchmark_results_rtf.json")
+ prefix_path(output_data_dir, "benchmark_results_rtf.json"),
)
print("\nResults saved to:")
diff --git a/examples/assorted_checks/benchmarks/depr_benchmark_tts.py b/examples/assorted_checks/benchmarks/depr_benchmark_tts.py
index 989c177..6fd600e 100644
--- a/examples/assorted_checks/benchmarks/depr_benchmark_tts.py
+++ b/examples/assorted_checks/benchmarks/depr_benchmark_tts.py
@@ -1,19 +1,30 @@
import os
import json
import time
+
import pandas as pd
-from examples.assorted_checks.lib.shared_plotting import plot_system_metrics, plot_correlation
+
from examples.assorted_checks.lib.shared_utils import (
- get_system_metrics, save_json_results, write_benchmark_stats
+ save_json_results,
+ get_system_metrics,
+ write_benchmark_stats,
+)
+from examples.assorted_checks.lib.shared_plotting import (
+ plot_correlation,
+ plot_system_metrics,
)
from examples.assorted_checks.lib.shared_benchmark_utils import (
- get_text_for_tokens, make_tts_request, generate_token_sizes, enc
+ enc,
+ make_tts_request,
+ get_text_for_tokens,
+ generate_token_sizes,
)
def main():
# Get optional prefix from first command line argument
import sys
+
prefix = sys.argv[1] if len(sys.argv) > 1 else ""
# Set up paths relative to this file
@@ -21,7 +32,7 @@ def main():
output_dir = os.path.join(script_dir, "output_audio")
output_data_dir = os.path.join(script_dir, "output_data")
output_plots_dir = os.path.join(script_dir, "output_plots")
-
+
# Create output directories
os.makedirs(output_dir, exist_ok=True)
os.makedirs(output_data_dir, exist_ok=True)
@@ -43,7 +54,6 @@ def main():
total_tokens = len(enc.encode(text))
print(f"Total tokens in file: {total_tokens}")
-
token_sizes = generate_token_sizes(total_tokens)
print(f"Testing sizes: {token_sizes}")
@@ -85,7 +95,7 @@ def main():
# Save intermediate results
save_json_results(
{"results": results, "system_metrics": system_metrics},
- prefix_path(output_data_dir, "benchmark_results.json")
+ prefix_path(output_data_dir, "benchmark_results.json"),
)
# Create DataFrame and calculate stats
@@ -102,53 +112,59 @@ def main():
{
"title": "Benchmark Statistics",
"stats": {
- "Total tokens processed": df['tokens'].sum(),
- "Total audio generated (s)": df['output_length'].sum(),
- "Total test duration (s)": df['elapsed_time'].max(),
- "Average processing rate (tokens/s)": df['tokens_per_second'].mean(),
- "Average realtime factor": df['realtime_factor'].mean()
- }
+ "Total tokens processed": df["tokens"].sum(),
+ "Total audio generated (s)": df["output_length"].sum(),
+ "Total test duration (s)": df["elapsed_time"].max(),
+ "Average processing rate (tokens/s)": df["tokens_per_second"].mean(),
+ "Average realtime factor": df["realtime_factor"].mean(),
+ },
},
{
"title": "Per-chunk Stats",
"stats": {
- "Average chunk size (tokens)": df['tokens'].mean(),
- "Min chunk size (tokens)": df['tokens'].min(),
- "Max chunk size (tokens)": df['tokens'].max(),
- "Average processing time (s)": df['processing_time'].mean(),
- "Average output length (s)": df['output_length'].mean()
- }
+ "Average chunk size (tokens)": df["tokens"].mean(),
+ "Min chunk size (tokens)": df["tokens"].min(),
+ "Max chunk size (tokens)": df["tokens"].max(),
+ "Average processing time (s)": df["processing_time"].mean(),
+ "Average output length (s)": df["output_length"].mean(),
+ },
},
{
"title": "Performance Ranges",
"stats": {
"Processing rate range (tokens/s)": f"{df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f}",
- "Realtime factor range": f"{df['realtime_factor'].min():.2f}x - {df['realtime_factor'].max():.2f}x"
- }
- }
+ "Realtime factor range": f"{df['realtime_factor'].min():.2f}x - {df['realtime_factor'].max():.2f}x",
+ },
+ },
]
write_benchmark_stats(stats, prefix_path(output_data_dir, "benchmark_stats.txt"))
# Plot Processing Time vs Token Count
plot_correlation(
- df, "tokens", "processing_time",
+ df,
+ "tokens",
+ "processing_time",
"Processing Time vs Input Size",
"Number of Input Tokens",
"Processing Time (seconds)",
- prefix_path(output_plots_dir, "processing_time.png")
+ prefix_path(output_plots_dir, "processing_time.png"),
)
# Plot Realtime Factor vs Token Count
plot_correlation(
- df, "tokens", "realtime_factor",
+ df,
+ "tokens",
+ "realtime_factor",
"Realtime Factor vs Input Size",
"Number of Input Tokens",
"Realtime Factor (output length / processing time)",
- prefix_path(output_plots_dir, "realtime_factor.png")
+ prefix_path(output_plots_dir, "realtime_factor.png"),
)
# Plot system metrics
- plot_system_metrics(system_metrics, prefix_path(output_plots_dir, "system_usage.png"))
+ plot_system_metrics(
+ system_metrics, prefix_path(output_plots_dir, "system_usage.png")
+ )
print("\nResults saved to:")
print(f"- {prefix_path(output_data_dir, 'benchmark_results.json')}")
diff --git a/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py b/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py
index c2fd1c4..f44f7eb 100644
--- a/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py
+++ b/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py
@@ -1,11 +1,12 @@
"""Shared utilities specific to TTS benchmarking."""
+
import time
-from typing import List, Optional, Tuple
+from typing import List, Tuple, Optional
import requests
import tiktoken
-from .shared_utils import get_audio_length, save_audio_file
+from .shared_utils import save_audio_file, get_audio_length
# Global tokenizer instance
enc = tiktoken.get_encoding("cl100k_base")
@@ -13,11 +14,11 @@ enc = tiktoken.get_encoding("cl100k_base")
def get_text_for_tokens(text: str, num_tokens: int) -> str:
"""Get a slice of text that contains exactly num_tokens tokens.
-
+
Args:
text: Input text to slice
num_tokens: Desired number of tokens
-
+
Returns:
str: Text slice containing exactly num_tokens tokens
"""
@@ -31,44 +32,69 @@ def make_tts_request(
text: str,
output_dir: str = None,
timeout: int = 1800,
- prefix: str = ""
+ prefix: str = "",
+ stream: bool = True,
) -> Tuple[Optional[float], Optional[float]]:
"""Make TTS request using OpenAI-compatible endpoint.
-
+
Args:
text: Input text to convert to speech
output_dir: Directory to save audio files. If None, audio won't be saved.
timeout: Request timeout in seconds
prefix: Optional prefix for output filenames
-
+
Returns:
tuple: (processing_time, audio_length) in seconds, or (None, None) on error
"""
try:
start_time = time.time()
- response = requests.post(
- "http://localhost:8880/v1/audio/speech",
- json={
- "model": "kokoro",
- "input": text,
- "voice": "af",
- "response_format": "wav",
- },
- timeout=timeout,
- )
- response.raise_for_status()
+ if stream:
+ # For streaming, we need to collect all chunks
+ audio_chunks = []
+ response = requests.post(
+ "http://localhost:8880/v1/audio/speech",
+ json={
+ "model": "kokoro",
+ "input": text,
+ "voice": "af",
+ "response_format": "wav",
+ "stream": True,
+ },
+ timeout=timeout,
+ stream=True,
+ )
+ response.raise_for_status()
+
+ for chunk in response.iter_content(chunk_size=8192):
+ if chunk:
+ audio_chunks.append(chunk)
+
+ # Combine all chunks
+ audio_data = b"".join(audio_chunks)
+ else:
+ response = requests.post(
+ "http://localhost:8880/v1/audio/speech",
+ json={
+ "model": "kokoro",
+ "input": text,
+ "voice": "af",
+ "response_format": "wav",
+ "stream": False,
+ },
+ timeout=timeout,
+ )
+ response.raise_for_status()
+ audio_data = response.content
processing_time = round(time.time() - start_time, 2)
- # Calculate audio length from response content
- audio_length = get_audio_length(response.content)
-
+ # Calculate audio length from audio data
+ audio_length = get_audio_length(audio_data)
+
# Save the audio file if output_dir is provided
if output_dir:
token_count = len(enc.encode(text))
output_file = save_audio_file(
- response.content,
- f"chunk_{token_count}_tokens",
- output_dir
+ audio_data, f"chunk_{token_count}_tokens", output_dir
)
print(f"Saved audio to {output_file}")
@@ -86,26 +112,26 @@ def generate_token_sizes(
max_tokens: int,
dense_step: int = 100,
dense_max: int = 1000,
- sparse_step: int = 1000
+ sparse_step: int = 1000,
) -> List[int]:
"""Generate token size ranges with dense sampling at start.
-
+
Args:
max_tokens: Maximum number of tokens to generate sizes up to
dense_step: Step size for dense sampling range
dense_max: Maximum value for dense sampling
sparse_step: Step size for sparse sampling range
-
+
Returns:
list: Sorted list of token sizes
"""
# Dense sampling at start
dense_range = list(range(dense_step, dense_max + 1, dense_step))
-
+
if max_tokens <= dense_max or sparse_step < dense_max:
return sorted(dense_range)
# Sparse sampling for larger sizes
sparse_range = list(range(dense_max + sparse_step, max_tokens + 1, sparse_step))
-
+
# Combine and deduplicate
return sorted(list(set(dense_range + sparse_range)))
diff --git a/examples/assorted_checks/benchmarks/lib/shared_plotting.py b/examples/assorted_checks/benchmarks/lib/shared_plotting.py
index 18e6e68..6ca4872 100644
--- a/examples/assorted_checks/benchmarks/lib/shared_plotting.py
+++ b/examples/assorted_checks/benchmarks/lib/shared_plotting.py
@@ -1,7 +1,8 @@
"""Shared plotting utilities for benchmarks and tests."""
+
+import numpy as np
import pandas as pd
import seaborn as sns
-import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
@@ -12,66 +13,71 @@ STYLE_CONFIG = {
"secondary_color": "#05d9e8",
"grid_color": "#ffffff",
"text_color": "#ffffff",
- "font_sizes": {
- "title": 16,
- "label": 14,
- "tick": 12,
- "text": 10
- }
+ "font_sizes": {"title": 16, "label": 14, "tick": 12, "text": 10},
}
+
def setup_plot(fig, ax, title, xlabel=None, ylabel=None):
"""Configure plot styling with consistent theme.
-
+
Args:
fig: matplotlib figure object
ax: matplotlib axis object
title: str, plot title
xlabel: str, optional x-axis label
ylabel: str, optional y-axis label
-
+
Returns:
tuple: (fig, ax) with applied styling
"""
# Grid styling
ax.grid(True, linestyle="--", alpha=0.3, color=STYLE_CONFIG["grid_color"])
-
+
# Title and labels
- ax.set_title(title, pad=20,
- fontsize=STYLE_CONFIG["font_sizes"]["title"],
- fontweight="bold",
- color=STYLE_CONFIG["text_color"])
-
+ ax.set_title(
+ title,
+ pad=20,
+ fontsize=STYLE_CONFIG["font_sizes"]["title"],
+ fontweight="bold",
+ color=STYLE_CONFIG["text_color"],
+ )
+
if xlabel:
- ax.set_xlabel(xlabel,
- fontsize=STYLE_CONFIG["font_sizes"]["label"],
- fontweight="medium",
- color=STYLE_CONFIG["text_color"])
+ ax.set_xlabel(
+ xlabel,
+ fontsize=STYLE_CONFIG["font_sizes"]["label"],
+ fontweight="medium",
+ color=STYLE_CONFIG["text_color"],
+ )
if ylabel:
- ax.set_ylabel(ylabel,
- fontsize=STYLE_CONFIG["font_sizes"]["label"],
- fontweight="medium",
- color=STYLE_CONFIG["text_color"])
-
+ ax.set_ylabel(
+ ylabel,
+ fontsize=STYLE_CONFIG["font_sizes"]["label"],
+ fontweight="medium",
+ color=STYLE_CONFIG["text_color"],
+ )
+
# Tick styling
- ax.tick_params(labelsize=STYLE_CONFIG["font_sizes"]["tick"],
- colors=STYLE_CONFIG["text_color"])
-
+ ax.tick_params(
+ labelsize=STYLE_CONFIG["font_sizes"]["tick"], colors=STYLE_CONFIG["text_color"]
+ )
+
# Spine styling
for spine in ax.spines.values():
spine.set_color(STYLE_CONFIG["text_color"])
spine.set_alpha(0.3)
spine.set_linewidth(0.5)
-
+
# Background colors
ax.set_facecolor(STYLE_CONFIG["background_color"])
fig.patch.set_facecolor(STYLE_CONFIG["background_color"])
-
+
return fig, ax
+
def plot_system_metrics(metrics_data, output_path):
"""Create plots for system metrics over time.
-
+
Args:
metrics_data: list of dicts containing system metrics
output_path: str, path to save the output plot
@@ -79,68 +85,118 @@ def plot_system_metrics(metrics_data, output_path):
df = pd.DataFrame(metrics_data)
df["timestamp"] = pd.to_datetime(df["timestamp"])
elapsed_time = (df["timestamp"] - df["timestamp"].iloc[0]).dt.total_seconds()
-
+
# Get baseline values
baseline_cpu = df["cpu_percent"].iloc[0]
baseline_ram = df["ram_used_gb"].iloc[0]
- baseline_gpu = df["gpu_memory_used"].iloc[0] / 1024 if "gpu_memory_used" in df.columns else None
-
+ baseline_gpu = (
+ df["gpu_memory_used"].iloc[0] / 1024
+ if "gpu_memory_used" in df.columns
+ else None
+ )
+
# Convert GPU memory to GB if present
if "gpu_memory_used" in df.columns:
df["gpu_memory_gb"] = df["gpu_memory_used"] / 1024
-
+
plt.style.use("dark_background")
-
+
# Create subplots based on available metrics
has_gpu = "gpu_memory_used" in df.columns
num_plots = 3 if has_gpu else 2
fig, axes = plt.subplots(num_plots, 1, figsize=(15, 5 * num_plots))
fig.patch.set_facecolor(STYLE_CONFIG["background_color"])
-
+
# Smoothing window
window = min(5, len(df) // 2)
-
+
# Plot CPU Usage
smoothed_cpu = df["cpu_percent"].rolling(window=window, center=True).mean()
- sns.lineplot(x=elapsed_time, y=smoothed_cpu, ax=axes[0],
- color=STYLE_CONFIG["primary_color"], linewidth=2)
- axes[0].axhline(y=baseline_cpu, color=STYLE_CONFIG["secondary_color"],
- linestyle="--", alpha=0.5, label="Baseline")
- setup_plot(fig, axes[0], "CPU Usage Over Time",
- xlabel="Time (seconds)", ylabel="CPU Usage (%)")
+ sns.lineplot(
+ x=elapsed_time,
+ y=smoothed_cpu,
+ ax=axes[0],
+ color=STYLE_CONFIG["primary_color"],
+ linewidth=2,
+ )
+ axes[0].axhline(
+ y=baseline_cpu,
+ color=STYLE_CONFIG["secondary_color"],
+ linestyle="--",
+ alpha=0.5,
+ label="Baseline",
+ )
+ setup_plot(
+ fig,
+ axes[0],
+ "CPU Usage Over Time",
+ xlabel="Time (seconds)",
+ ylabel="CPU Usage (%)",
+ )
axes[0].set_ylim(0, max(df["cpu_percent"]) * 1.1)
axes[0].legend()
-
+
# Plot RAM Usage
smoothed_ram = df["ram_used_gb"].rolling(window=window, center=True).mean()
- sns.lineplot(x=elapsed_time, y=smoothed_ram, ax=axes[1],
- color=STYLE_CONFIG["secondary_color"], linewidth=2)
- axes[1].axhline(y=baseline_ram, color=STYLE_CONFIG["primary_color"],
- linestyle="--", alpha=0.5, label="Baseline")
- setup_plot(fig, axes[1], "RAM Usage Over Time",
- xlabel="Time (seconds)", ylabel="RAM Usage (GB)")
+ sns.lineplot(
+ x=elapsed_time,
+ y=smoothed_ram,
+ ax=axes[1],
+ color=STYLE_CONFIG["secondary_color"],
+ linewidth=2,
+ )
+ axes[1].axhline(
+ y=baseline_ram,
+ color=STYLE_CONFIG["primary_color"],
+ linestyle="--",
+ alpha=0.5,
+ label="Baseline",
+ )
+ setup_plot(
+ fig,
+ axes[1],
+ "RAM Usage Over Time",
+ xlabel="Time (seconds)",
+ ylabel="RAM Usage (GB)",
+ )
axes[1].set_ylim(0, max(df["ram_used_gb"]) * 1.1)
axes[1].legend()
-
+
# Plot GPU Memory if available
if has_gpu:
smoothed_gpu = df["gpu_memory_gb"].rolling(window=window, center=True).mean()
- sns.lineplot(x=elapsed_time, y=smoothed_gpu, ax=axes[2],
- color=STYLE_CONFIG["primary_color"], linewidth=2)
- axes[2].axhline(y=baseline_gpu, color=STYLE_CONFIG["secondary_color"],
- linestyle="--", alpha=0.5, label="Baseline")
- setup_plot(fig, axes[2], "GPU Memory Usage Over Time",
- xlabel="Time (seconds)", ylabel="GPU Memory (GB)")
+ sns.lineplot(
+ x=elapsed_time,
+ y=smoothed_gpu,
+ ax=axes[2],
+ color=STYLE_CONFIG["primary_color"],
+ linewidth=2,
+ )
+ axes[2].axhline(
+ y=baseline_gpu,
+ color=STYLE_CONFIG["secondary_color"],
+ linestyle="--",
+ alpha=0.5,
+ label="Baseline",
+ )
+ setup_plot(
+ fig,
+ axes[2],
+ "GPU Memory Usage Over Time",
+ xlabel="Time (seconds)",
+ ylabel="GPU Memory (GB)",
+ )
axes[2].set_ylim(0, max(df["gpu_memory_gb"]) * 1.1)
axes[2].legend()
-
+
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches="tight")
plt.close()
-def plot_timeline(df, output_path, suffix=""):
+
+def plot_timeline(df, output_path, suffix="", prefix=""):
"""Create timeline plot showing latency for each run.
-
+
Args:
df: pandas DataFrame containing run data with columns:
- target_tokens: number of tokens
@@ -149,124 +205,161 @@ def plot_timeline(df, output_path, suffix=""):
output_path: str, path to save the output plot
"""
plt.style.use("dark_background")
-
+
# Sort by tokens and run number
- df = df.sort_values(['target_tokens', 'run_number'])
-
+ df = df.sort_values(["target_tokens", "run_number"])
+
# Create figure and axis
fig, ax = plt.subplots(figsize=(12, 6))
-
+
# Calculate y positions for each run with tighter grouping
- unique_tokens = sorted(df['target_tokens'].unique())
+ unique_tokens = sorted(df["target_tokens"].unique())
y_positions = {}
current_y = 0
group_spacing = 0.8 # Space between groups
- run_spacing = 0.2 # Space between runs in a group
-
+ run_spacing = 0.2 # Space between runs in a group
+
for tokens in unique_tokens:
- runs = df[df['target_tokens'] == tokens]
+ runs = df[df["target_tokens"] == tokens]
base_y = current_y
for i, (_, run) in enumerate(runs.iterrows()):
- y_positions[(tokens, run['run_number'])] = base_y + (i * run_spacing)
+ y_positions[(tokens, run["run_number"])] = base_y + (i * run_spacing)
current_y = base_y + (len(runs) * run_spacing) + group_spacing
-
+
# Plot bars and points with more transparency
bar_height = 0.15
for _, row in df.iterrows():
- y = y_positions[(row['target_tokens'], row['run_number'])]
- latency = row['time_to_first_chunk']
-
+ y = y_positions[(row["target_tokens"], row["run_number"])]
+ latency = row["time_to_first_chunk"]
+
# Latency bar
- ax.add_patch(patches.Rectangle(
- (0, y - bar_height/2),
- latency,
- bar_height,
- facecolor=STYLE_CONFIG["primary_color"],
- alpha=0.3
- ))
-
+ ax.add_patch(
+ patches.Rectangle(
+ (0, y - bar_height / 2),
+ latency,
+ bar_height,
+ facecolor=STYLE_CONFIG["primary_color"],
+ alpha=0.3,
+ )
+ )
+
# End point
- ax.plot(latency, y, 'o',
- color=STYLE_CONFIG["secondary_color"],
- markersize=4,
- alpha=0.5)
-
+ ax.plot(
+ latency,
+ y,
+ "o",
+ color=STYLE_CONFIG["secondary_color"],
+ markersize=4,
+ alpha=0.5,
+ )
+
# Add mean lines and values for each token group
for tokens in unique_tokens:
- token_runs = df[df['target_tokens'] == tokens]
- mean_latency = token_runs['time_to_first_chunk'].mean()
- y_positions_for_token = [y_positions[(tokens, run['run_number'])] for _, run in token_runs.iterrows()]
+ token_runs = df[df["target_tokens"] == tokens]
+ mean_latency = token_runs["time_to_first_chunk"].mean()
+ y_positions_for_token = [
+ y_positions[(tokens, run["run_number"])] for _, run in token_runs.iterrows()
+ ]
min_y = min(y_positions_for_token)
max_y = max(y_positions_for_token)
group_center = (min_y + max_y) / 2
-
+
# Plot mean line with gradient alpha
gradient = np.linspace(0.2, 0.8, 100)
- for i in range(len(gradient)-1):
- y1 = min_y - bar_height + (max_y - min_y + 2*bar_height) * (i/len(gradient))
- y2 = min_y - bar_height + (max_y - min_y + 2*bar_height) * ((i+1)/len(gradient))
- ax.plot([mean_latency, mean_latency], [y1, y2],
- '-', color=STYLE_CONFIG["secondary_color"],
- linewidth=3, alpha=gradient[i])
-
+ for i in range(len(gradient) - 1):
+ y1 = (
+ min_y
+ - bar_height
+ + (max_y - min_y + 2 * bar_height) * (i / len(gradient))
+ )
+ y2 = (
+ min_y
+ - bar_height
+ + (max_y - min_y + 2 * bar_height) * ((i + 1) / len(gradient))
+ )
+ ax.plot(
+ [mean_latency, mean_latency],
+ [y1, y2],
+ "-",
+ color=STYLE_CONFIG["secondary_color"],
+ linewidth=3,
+ alpha=gradient[i],
+ )
+
# Add mean value label with background
- label_text = f'Mean: {mean_latency:.3f}s'
+ label_text = f"Mean: {mean_latency:.3f}s"
bbox_props = dict(
facecolor=STYLE_CONFIG["background_color"],
edgecolor=STYLE_CONFIG["secondary_color"],
alpha=0.8,
pad=3,
- linewidth=1
+ linewidth=1,
)
- ax.text(mean_latency + 0.02, group_center,
- label_text,
- color=STYLE_CONFIG["secondary_color"],
- va='center',
- fontsize=10,
- fontweight='bold',
- bbox=bbox_props)
-
+ ax.text(
+ mean_latency + 0.02,
+ group_center,
+ label_text,
+ color=STYLE_CONFIG["secondary_color"],
+ va="center",
+ fontsize=10,
+ fontweight="bold",
+ bbox=bbox_props,
+ )
+
# Customize plot
ax.set_ylim(-1, current_y)
- ax.set_xlim(0, df['time_to_first_chunk'].max() * 1.3) # Extra space for labels
-
+ ax.set_xlim(0, df["time_to_first_chunk"].max() * 1.3) # Extra space for labels
+
# Add labels for token groups with tighter spacing
group_positions = {}
for tokens in unique_tokens:
- runs = df[df['target_tokens'] == tokens]
- y_positions_for_token = [y_positions[(tokens, run['run_number'])] for _, run in runs.iterrows()]
- group_positions[tokens] = sum(y_positions_for_token) / len(y_positions_for_token)
- plt.axhline(y=min(y_positions_for_token) - bar_height,
- color='white', alpha=0.1, linestyle='-')
-
+ runs = df[df["target_tokens"] == tokens]
+ y_positions_for_token = [
+ y_positions[(tokens, run["run_number"])] for _, run in runs.iterrows()
+ ]
+ group_positions[tokens] = sum(y_positions_for_token) / len(
+ y_positions_for_token
+ )
+ plt.axhline(
+ y=min(y_positions_for_token) - bar_height,
+ color="white",
+ alpha=0.1,
+ linestyle="-",
+ )
+
# Calculate mean audio length for each token group
audio_lengths = {}
for tokens in unique_tokens:
- token_runs = df[df['target_tokens'] == tokens]
- audio_lengths[tokens] = token_runs['audio_length'].mean()
+ token_runs = df[df["target_tokens"] == tokens]
+ audio_lengths[tokens] = token_runs["audio_length"].mean()
# Set y-ticks at group centers with token counts and audio lengths
plt.yticks(
list(group_positions.values()),
- [f'{tokens} tokens\n({audio_lengths[tokens]:.1f}s)' for tokens in group_positions.keys()],
- fontsize=10
+ [
+ f"{tokens} tokens\n({audio_lengths[tokens]:.1f}s)"
+ for tokens in group_positions.keys()
+ ],
+ fontsize=10,
)
-
+
# Customize appearance
setup_plot(
- fig, ax,
- "Time-To-Audio Latency" + suffix,
+ fig,
+ ax,
+ prefix.upper() + " Time-To-Audio Latency " + suffix,
xlabel="Time (seconds)",
- ylabel="Input Size"
+ ylabel="Input Size",
)
-
+
plt.tight_layout()
plt.savefig(output_path, dpi=300, bbox_inches="tight")
plt.close()
+
def plot_correlation(df, x, y, title, xlabel, ylabel, output_path):
"""Create correlation plot with regression line and correlation coefficient.
-
+
Args:
df: pandas DataFrame containing the data
x: str, column name for x-axis
@@ -277,28 +370,40 @@ def plot_correlation(df, x, y, title, xlabel, ylabel, output_path):
output_path: str, path to save the output plot
"""
plt.style.use("dark_background")
-
+
fig, ax = plt.subplots(figsize=(12, 8))
-
+
# Scatter plot
- sns.scatterplot(data=df, x=x, y=y, s=100, alpha=0.6,
- color=STYLE_CONFIG["primary_color"])
-
+ sns.scatterplot(
+ data=df, x=x, y=y, s=100, alpha=0.6, color=STYLE_CONFIG["primary_color"]
+ )
+
# Regression line
- sns.regplot(data=df, x=x, y=y, scatter=False,
- color=STYLE_CONFIG["secondary_color"],
- line_kws={"linewidth": 2})
-
+ sns.regplot(
+ data=df,
+ x=x,
+ y=y,
+ scatter=False,
+ color=STYLE_CONFIG["secondary_color"],
+ line_kws={"linewidth": 2},
+ )
+
# Add correlation coefficient
corr = df[x].corr(df[y])
- plt.text(0.05, 0.95, f"Correlation: {corr:.2f}",
- transform=ax.transAxes,
- fontsize=STYLE_CONFIG["font_sizes"]["text"],
- color=STYLE_CONFIG["text_color"],
- bbox=dict(facecolor=STYLE_CONFIG["background_color"],
- edgecolor=STYLE_CONFIG["text_color"],
- alpha=0.7))
-
+ plt.text(
+ 0.05,
+ 0.95,
+ f"Correlation: {corr:.2f}",
+ transform=ax.transAxes,
+ fontsize=STYLE_CONFIG["font_sizes"]["text"],
+ color=STYLE_CONFIG["text_color"],
+ bbox=dict(
+ facecolor=STYLE_CONFIG["background_color"],
+ edgecolor=STYLE_CONFIG["text_color"],
+ alpha=0.7,
+ ),
+ )
+
setup_plot(fig, ax, title, xlabel=xlabel, ylabel=ylabel)
plt.savefig(output_path, dpi=300, bbox_inches="tight")
plt.close()
diff --git a/examples/assorted_checks/benchmarks/lib/shared_utils.py b/examples/assorted_checks/benchmarks/lib/shared_utils.py
index a9c872e..1e3fbdb 100644
--- a/examples/assorted_checks/benchmarks/lib/shared_utils.py
+++ b/examples/assorted_checks/benchmarks/lib/shared_utils.py
@@ -1,9 +1,10 @@
"""Shared utilities for benchmarks and tests."""
+
import os
import json
import subprocess
+from typing import Any, Dict, List, Union, Optional
from datetime import datetime
-from typing import Any, Dict, List, Optional, Union
import psutil
import scipy.io.wavfile as wavfile
@@ -12,28 +13,46 @@ import scipy.io.wavfile as wavfile
TORCH_AVAILABLE = False
try:
import torch
+
TORCH_AVAILABLE = torch.cuda.is_available()
except ImportError:
pass
+def check_audio_file_is_silent(audio_path: str, threshold: float = 0.01) -> bool:
+ """Check if an audio file is silent by comparing peak amplitude to a threshold.
+
+ Args:
+ audio_path: Path to the audio file
+ threshold: Peak amplitude threshold for silence
+
+ Returns:
+ bool: True if audio is silent, False otherwise
+ """
+ rate, data = wavfile.read(audio_path)
+ peak_amplitude = max(abs(data.min()), abs(data.max())) / 32768.0 # 16-bit audio
+
+ return peak_amplitude < threshold
+
+
def get_audio_length(audio_data: bytes, temp_dir: str = None) -> float:
"""Get audio length in seconds from bytes data.
-
+
Args:
audio_data: Raw audio bytes
temp_dir: Directory for temporary file. If None, uses system temp directory.
-
+
Returns:
float: Audio length in seconds
"""
if temp_dir is None:
import tempfile
+
temp_dir = tempfile.gettempdir()
-
+
temp_path = os.path.join(temp_dir, "temp.wav")
os.makedirs(temp_dir, exist_ok=True)
-
+
with open(temp_path, "wb") as f:
f.write(audio_data)
@@ -47,11 +66,11 @@ def get_audio_length(audio_data: bytes, temp_dir: str = None) -> float:
def get_gpu_memory(average: bool = True) -> Optional[Union[float, List[float]]]:
"""Get GPU memory usage using PyTorch if available, falling back to nvidia-smi.
-
+
Args:
average: If True and multiple GPUs present, returns average memory usage.
If False, returns list of memory usage per GPU.
-
+
Returns:
float or List[float] or None: GPU memory usage in MB. Returns None if no GPU available.
If average=False and multiple GPUs present, returns list of values.
@@ -60,19 +79,23 @@ def get_gpu_memory(average: bool = True) -> Optional[Union[float, List[float]]]:
n_gpus = torch.cuda.device_count()
memory_used = []
for i in range(n_gpus):
- memory_used.append(torch.cuda.memory_allocated(i) / 1024**2) # Convert to MB
-
+ memory_used.append(
+ torch.cuda.memory_allocated(i) / 1024**2
+ ) # Convert to MB
+
if average and len(memory_used) > 0:
return sum(memory_used) / len(memory_used)
return memory_used if len(memory_used) > 1 else memory_used[0]
-
+
# Fall back to nvidia-smi
try:
result = subprocess.check_output(
["nvidia-smi", "--query-gpu=memory.used", "--format=csv,nounits,noheader"]
)
- memory_values = [float(x.strip()) for x in result.decode("utf-8").split("\n") if x.strip()]
-
+ memory_values = [
+ float(x.strip()) for x in result.decode("utf-8").split("\n") if x.strip()
+ ]
+
if average and len(memory_values) > 0:
return sum(memory_values) / len(memory_values)
return memory_values if len(memory_values) > 1 else memory_values[0]
@@ -82,14 +105,14 @@ def get_gpu_memory(average: bool = True) -> Optional[Union[float, List[float]]]:
def get_system_metrics() -> Dict[str, Union[str, float]]:
"""Get current system metrics including CPU, RAM, and GPU if available.
-
+
Returns:
dict: System metrics including timestamp, CPU%, RAM%, RAM GB, and GPU MB if available
"""
# Get per-CPU percentages and calculate average
cpu_percentages = psutil.cpu_percent(percpu=True)
avg_cpu = sum(cpu_percentages) / len(cpu_percentages)
-
+
metrics = {
"timestamp": datetime.now().isoformat(),
"cpu_percent": round(avg_cpu, 2),
@@ -106,40 +129,40 @@ def get_system_metrics() -> Dict[str, Union[str, float]]:
def save_audio_file(audio_data: bytes, identifier: str, output_dir: str) -> str:
"""Save audio data to a file with proper naming and directory creation.
-
+
Args:
audio_data: Raw audio bytes
identifier: String to identify this audio file (e.g. token count, test name)
output_dir: Directory to save the file
-
+
Returns:
str: Path to the saved audio file
"""
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, f"{identifier}.wav")
-
+
with open(output_file, "wb") as f:
f.write(audio_data)
-
+
return output_file
def write_benchmark_stats(stats: List[Dict[str, Any]], output_file: str) -> None:
"""Write benchmark statistics to a file in a clean, organized format.
-
+
Args:
stats: List of dictionaries containing stat name/value pairs
output_file: Path to output file
"""
os.makedirs(os.path.dirname(output_file), exist_ok=True)
-
+
with open(output_file, "w") as f:
for section in stats:
# Write section header
f.write(f"=== {section['title']} ===\n\n")
-
+
# Write stats
- for label, value in section['stats'].items():
+ for label, value in section["stats"].items():
if isinstance(value, float):
f.write(f"{label}: {value:.2f}\n")
else:
@@ -149,7 +172,7 @@ def write_benchmark_stats(stats: List[Dict[str, Any]], output_file: str) -> None
def save_json_results(results: Dict[str, Any], output_file: str) -> None:
"""Save benchmark results to a JSON file with proper formatting.
-
+
Args:
results: Dictionary of results to save
output_file: Path to output file
@@ -159,14 +182,16 @@ def save_json_results(results: Dict[str, Any], output_file: str) -> None:
json.dump(results, f, indent=2)
-def real_time_factor(processing_time: float, audio_length: float, decimals: int = 2) -> float:
+def real_time_factor(
+ processing_time: float, audio_length: float, decimals: int = 2
+) -> float:
"""Calculate Real-Time Factor (RTF) as processing-time / length-of-audio.
-
+
Args:
processing_time: Time taken to process/generate audio
audio_length: Length of the generated audio
decimals: Number of decimal places to round to
-
+
Returns:
float: RTF value
"""
diff --git a/examples/assorted_checks/benchmarks/lib/stream_utils.py b/examples/assorted_checks/benchmarks/lib/stream_utils.py
new file mode 100644
index 0000000..623b18a
--- /dev/null
+++ b/examples/assorted_checks/benchmarks/lib/stream_utils.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+import os
+import time
+import wave
+from typing import Any, Dict, List, Callable, Optional
+
+import pandas as pd
+import scipy.io.wavfile as wavfile
+
+from .shared_utils import save_json_results
+from .shared_plotting import plot_timeline, plot_correlation
+from .shared_benchmark_utils import enc, get_text_for_tokens
+
+
+def check_audio_silence(audio_path: str) -> bool:
+ """Check if audio file contains only silence"""
+ sample_rate, audio_data = wavfile.read(audio_path)
+ # Convert to float for RMS calculation
+ audio_float = audio_data.astype(float)
+ # Calculate RMS value
+ rms = (audio_float**2).mean() ** 0.5
+ # Define silence threshold (adjust if needed)
+ SILENCE_THRESHOLD = 50.0
+ return rms < SILENCE_THRESHOLD
+
+
+def process_benchmark_results(
+ all_results: List[Dict[str, Any]], token_sizes: List[int]
+) -> Dict[str, Any]:
+ """Process benchmark results and generate summary"""
+ summary = {}
+ for tokens in token_sizes:
+ matching_results = [
+ r for r in all_results if r["target_tokens"] == tokens and not r["error"]
+ ]
+ if matching_results:
+ avg_first_chunk = sum(
+ r["time_to_first_chunk"] for r in matching_results
+ ) / len(matching_results)
+ avg_total = sum(r["total_time"] for r in matching_results) / len(
+ matching_results
+ )
+ avg_audio_length = sum(r["audio_length"] for r in matching_results) / len(
+ matching_results
+ )
+ summary[tokens] = {
+ "avg_time_to_first_chunk": round(avg_first_chunk, 3),
+ "avg_total_time": round(avg_total, 3),
+ "avg_audio_length": round(avg_audio_length, 3),
+ "num_successful_runs": len(matching_results),
+ }
+ return summary
+
+
+def save_benchmark_results(
+ all_results: List[Dict[str, Any]],
+ summary: Dict[str, Any],
+ output_data_dir: str,
+ output_plots_dir: str,
+ suffix: str,
+ plot_title_suffix: str,
+ prefix: str = "",
+):
+ """Save benchmark results and generate plots"""
+ # Save results
+ results_data = {
+ "individual_runs": all_results,
+ "summary": summary,
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+ }
+ save_json_results(
+ results_data,
+ os.path.join(output_data_dir, f"{prefix}first_token_benchmark{suffix}.json"),
+ )
+
+ # Create DataFrame for plotting
+ df = pd.DataFrame(all_results)
+
+ # Create plots
+ plot_correlation(
+ df,
+ "target_tokens",
+ "time_to_first_chunk",
+ f"Time to First Audio vs Input Size {plot_title_suffix}",
+ "Number of Input Tokens",
+ "Time to First Audio (seconds)",
+ os.path.join(output_plots_dir, f"{prefix}first_token_latency{suffix}.png"),
+ )
+
+ plot_correlation(
+ df,
+ "target_tokens",
+ "total_time",
+ f"Total Time vs Input Size {plot_title_suffix}",
+ "Number of Input Tokens",
+ "Total Time (seconds)",
+ os.path.join(output_plots_dir, f"{prefix}total_time_latency{suffix}.png"),
+ )
+
+ plot_timeline(
+ df,
+ os.path.join(output_plots_dir, f"{prefix}first_token_timeline{suffix}.png"),
+ suffix=plot_title_suffix,
+ )
+
+
+def run_benchmark(
+ measure_func: Callable,
+ output_dir: str,
+ output_data_dir: str,
+ output_plots_dir: str,
+ suffix: str = "",
+ plot_title_suffix: str = "",
+ num_runs: int = 5,
+ client=None,
+ prefix="",
+):
+ """Run benchmark with the given measurement function"""
+ # Create output directories
+ os.makedirs(output_dir, exist_ok=True)
+ os.makedirs(output_data_dir, exist_ok=True)
+ os.makedirs(output_plots_dir, exist_ok=True)
+
+ # Load sample text
+ script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ with open(
+ os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8"
+ ) as f:
+ text = f.read()
+
+ # Test specific token counts
+ token_sizes = [10, 50, 100, 250, 500]
+ all_results = []
+ silent_files = []
+
+ for tokens in token_sizes:
+ print(
+ f"\nTesting {tokens} tokens{' ' + plot_title_suffix if plot_title_suffix else ''}"
+ )
+ test_text = get_text_for_tokens(text, tokens)
+ actual_tokens = len(enc.encode(test_text))
+ print(f"Text preview: {test_text[:50]}...")
+
+ for i in range(num_runs):
+ print(f"Run {i+1}/{num_runs}...")
+ result = measure_func(test_text, output_dir, tokens, i + 1)
+ result["target_tokens"] = tokens
+ result["actual_tokens"] = actual_tokens
+ result["run_number"] = i + 1
+
+ # Handle time to first audio
+ first_chunk = result.get('time_to_first_chunk')
+ print(
+ f"Time to First Audio: {f'{first_chunk:.3f}s' if first_chunk is not None else 'N/A'}"
+ )
+
+ # Handle total time
+ total_time = result.get('total_time')
+ print(
+ f"Time to Save Complete: {f'{total_time:.3f}s' if total_time is not None else 'N/A'}"
+ )
+
+ # Handle audio length
+ audio_length = result.get('audio_length')
+ print(
+ f"Audio length: {f'{audio_length:.3f}s' if audio_length is not None else 'N/A'}"
+ )
+ # Calculate streaming overhead only if both values exist
+ if total_time is not None and first_chunk is not None:
+ print(f"Streaming overhead: {(total_time - first_chunk):.3f}s")
+ else:
+ print("Streaming overhead: N/A")
+
+ if result["error"]:
+ print(f"Error: {result['error']}")
+ elif result["audio_path"] and check_audio_silence(result["audio_path"]):
+ silent_files.append(result["audio_path"])
+
+ all_results.append(result)
+
+ # Process and save results
+ summary = process_benchmark_results(all_results, token_sizes)
+ save_benchmark_results(
+ all_results,
+ summary,
+ output_data_dir,
+ output_plots_dir,
+ suffix,
+ plot_title_suffix,
+ )
+
+ # Print paths
+ print("\nResults and plots saved to:")
+ print(f"- {os.path.join(output_data_dir, f'{prefix}first_token_benchmark{suffix}.json')}")
+ print(f"- {os.path.join(output_plots_dir, f'{prefix}first_token_latency{suffix}.png')}")
+ print(f"- {os.path.join(output_plots_dir, f'{prefix}total_time_latency{suffix}.png')}")
+ print(f"- {os.path.join(output_plots_dir, f'{prefix}first_token_timeline{suffix}.png')}")
+
+ # Print silence check summary
+ if silent_files:
+ print("\nWARNING: The following files contain only silence:")
+ for file in silent_files:
+ print(f"- {file}")
+ else:
+ print("\nAll generated audio files contain valid audio content.")
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results.json
deleted file mode 100644
index 5c60933..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_results.json
+++ /dev/null
@@ -1,111 +0,0 @@
-{
- "results": [
- {
- "tokens": 100,
- "processing_time": 18.833295583724976,
- "output_length": 31.15,
- "realtime_factor": 1.6539856161403135,
- "elapsed_time": 19.024322748184204
- },
- {
- "tokens": 200,
- "processing_time": 38.95506024360657,
- "output_length": 62.6,
- "realtime_factor": 1.6069799304257042,
- "elapsed_time": 58.21527123451233
- },
- {
- "tokens": 300,
- "processing_time": 49.74252939224243,
- "output_length": 96.325,
- "realtime_factor": 1.9364716908630366,
- "elapsed_time": 108.19673728942871
- },
- {
- "tokens": 400,
- "processing_time": 61.349056243896484,
- "output_length": 128.575,
- "realtime_factor": 2.095794261102292,
- "elapsed_time": 169.733656167984
- },
- {
- "tokens": 500,
- "processing_time": 82.86568236351013,
- "output_length": 158.575,
- "realtime_factor": 1.9136389815071193,
- "elapsed_time": 252.7968451976776
- }
- ],
- "system_metrics": [
- {
- "timestamp": "2025-01-03T00:13:49.865330",
- "cpu_percent": 8.0,
- "ram_percent": 39.4,
- "ram_used_gb": 25.03811264038086,
- "gpu_memory_used": 1204.0
- },
- {
- "timestamp": "2025-01-03T00:14:08.781551",
- "cpu_percent": 26.8,
- "ram_percent": 42.6,
- "ram_used_gb": 27.090862274169922,
- "gpu_memory_used": 1225.0
- },
- {
- "timestamp": "2025-01-03T00:14:08.916973",
- "cpu_percent": 16.1,
- "ram_percent": 42.6,
- "ram_used_gb": 27.089553833007812,
- "gpu_memory_used": 1225.0
- },
- {
- "timestamp": "2025-01-03T00:14:47.979053",
- "cpu_percent": 31.5,
- "ram_percent": 43.6,
- "ram_used_gb": 27.714427947998047,
- "gpu_memory_used": 1225.0
- },
- {
- "timestamp": "2025-01-03T00:14:48.098976",
- "cpu_percent": 20.0,
- "ram_percent": 43.6,
- "ram_used_gb": 27.704315185546875,
- "gpu_memory_used": 1211.0
- },
- {
- "timestamp": "2025-01-03T00:15:37.944729",
- "cpu_percent": 29.7,
- "ram_percent": 38.6,
- "ram_used_gb": 24.53925323486328,
- "gpu_memory_used": 1217.0
- },
- {
- "timestamp": "2025-01-03T00:15:38.071915",
- "cpu_percent": 8.6,
- "ram_percent": 38.5,
- "ram_used_gb": 24.51690673828125,
- "gpu_memory_used": 1208.0
- },
- {
- "timestamp": "2025-01-03T00:16:39.525449",
- "cpu_percent": 23.4,
- "ram_percent": 38.8,
- "ram_used_gb": 24.71230697631836,
- "gpu_memory_used": 1221.0
- },
- {
- "timestamp": "2025-01-03T00:16:39.612442",
- "cpu_percent": 5.5,
- "ram_percent": 38.9,
- "ram_used_gb": 24.72066879272461,
- "gpu_memory_used": 1221.0
- },
- {
- "timestamp": "2025-01-03T00:18:02.569076",
- "cpu_percent": 27.4,
- "ram_percent": 39.1,
- "ram_used_gb": 24.868202209472656,
- "gpu_memory_used": 1264.0
- }
- ]
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json
deleted file mode 100644
index 52f8f04..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json
+++ /dev/null
@@ -1,216 +0,0 @@
-{
- "results": [
- {
- "tokens": 100,
- "processing_time": 14.349808931350708,
- "output_length": 31.15,
- "rtf": 0.46,
- "elapsed_time": 14.716031074523926
- },
- {
- "tokens": 200,
- "processing_time": 28.341803312301636,
- "output_length": 62.6,
- "rtf": 0.45,
- "elapsed_time": 43.44207406044006
- },
- {
- "tokens": 300,
- "processing_time": 43.352553606033325,
- "output_length": 96.325,
- "rtf": 0.45,
- "elapsed_time": 87.26906609535217
- },
- {
- "tokens": 400,
- "processing_time": 71.02449822425842,
- "output_length": 128.575,
- "rtf": 0.55,
- "elapsed_time": 158.7198133468628
- },
- {
- "tokens": 500,
- "processing_time": 70.92521691322327,
- "output_length": 158.575,
- "rtf": 0.45,
- "elapsed_time": 230.01379895210266
- },
- {
- "tokens": 600,
- "processing_time": 83.6328592300415,
- "output_length": 189.25,
- "rtf": 0.44,
- "elapsed_time": 314.02610969543457
- },
- {
- "tokens": 700,
- "processing_time": 103.0810194015503,
- "output_length": 222.075,
- "rtf": 0.46,
- "elapsed_time": 417.5678551197052
- },
- {
- "tokens": 800,
- "processing_time": 127.02162909507751,
- "output_length": 253.85,
- "rtf": 0.5,
- "elapsed_time": 545.0128681659698
- },
- {
- "tokens": 900,
- "processing_time": 130.49781227111816,
- "output_length": 283.775,
- "rtf": 0.46,
- "elapsed_time": 675.8943417072296
- },
- {
- "tokens": 1000,
- "processing_time": 154.76425909996033,
- "output_length": 315.475,
- "rtf": 0.49,
- "elapsed_time": 831.0677945613861
- }
- ],
- "system_metrics": [
- {
- "timestamp": "2025-01-03T00:23:52.896889",
- "cpu_percent": 4.5,
- "ram_percent": 39.1,
- "ram_used_gb": 24.86032485961914,
- "gpu_memory_used": 1281.0
- },
- {
- "timestamp": "2025-01-03T00:24:07.429461",
- "cpu_percent": 4.5,
- "ram_percent": 39.1,
- "ram_used_gb": 24.847564697265625,
- "gpu_memory_used": 1285.0
- },
- {
- "timestamp": "2025-01-03T00:24:07.620587",
- "cpu_percent": 2.7,
- "ram_percent": 39.1,
- "ram_used_gb": 24.846607208251953,
- "gpu_memory_used": 1275.0
- },
- {
- "timestamp": "2025-01-03T00:24:36.140754",
- "cpu_percent": 5.4,
- "ram_percent": 39.1,
- "ram_used_gb": 24.857810974121094,
- "gpu_memory_used": 1267.0
- },
- {
- "timestamp": "2025-01-03T00:24:36.340675",
- "cpu_percent": 6.2,
- "ram_percent": 39.1,
- "ram_used_gb": 24.85773468017578,
- "gpu_memory_used": 1267.0
- },
- {
- "timestamp": "2025-01-03T00:25:19.905634",
- "cpu_percent": 29.1,
- "ram_percent": 39.2,
- "ram_used_gb": 24.920318603515625,
- "gpu_memory_used": 1256.0
- },
- {
- "timestamp": "2025-01-03T00:25:20.182219",
- "cpu_percent": 20.0,
- "ram_percent": 39.2,
- "ram_used_gb": 24.930198669433594,
- "gpu_memory_used": 1256.0
- },
- {
- "timestamp": "2025-01-03T00:26:31.414760",
- "cpu_percent": 5.3,
- "ram_percent": 39.5,
- "ram_used_gb": 25.127891540527344,
- "gpu_memory_used": 1259.0
- },
- {
- "timestamp": "2025-01-03T00:26:31.617256",
- "cpu_percent": 3.6,
- "ram_percent": 39.5,
- "ram_used_gb": 25.126346588134766,
- "gpu_memory_used": 1252.0
- },
- {
- "timestamp": "2025-01-03T00:27:42.736097",
- "cpu_percent": 10.5,
- "ram_percent": 39.5,
- "ram_used_gb": 25.100231170654297,
- "gpu_memory_used": 1249.0
- },
- {
- "timestamp": "2025-01-03T00:27:42.912870",
- "cpu_percent": 5.3,
- "ram_percent": 39.5,
- "ram_used_gb": 25.098285675048828,
- "gpu_memory_used": 1249.0
- },
- {
- "timestamp": "2025-01-03T00:29:06.725264",
- "cpu_percent": 8.9,
- "ram_percent": 39.5,
- "ram_used_gb": 25.123123168945312,
- "gpu_memory_used": 1239.0
- },
- {
- "timestamp": "2025-01-03T00:29:06.928826",
- "cpu_percent": 5.5,
- "ram_percent": 39.5,
- "ram_used_gb": 25.128646850585938,
- "gpu_memory_used": 1239.0
- },
- {
- "timestamp": "2025-01-03T00:30:50.206349",
- "cpu_percent": 49.6,
- "ram_percent": 39.6,
- "ram_used_gb": 25.162948608398438,
- "gpu_memory_used": 1245.0
- },
- {
- "timestamp": "2025-01-03T00:30:50.491837",
- "cpu_percent": 14.8,
- "ram_percent": 39.5,
- "ram_used_gb": 25.13379669189453,
- "gpu_memory_used": 1245.0
- },
- {
- "timestamp": "2025-01-03T00:32:57.721467",
- "cpu_percent": 6.2,
- "ram_percent": 39.6,
- "ram_used_gb": 25.187721252441406,
- "gpu_memory_used": 1384.0
- },
- {
- "timestamp": "2025-01-03T00:32:57.913350",
- "cpu_percent": 3.6,
- "ram_percent": 39.6,
- "ram_used_gb": 25.199390411376953,
- "gpu_memory_used": 1384.0
- },
- {
- "timestamp": "2025-01-03T00:35:08.608730",
- "cpu_percent": 6.3,
- "ram_percent": 39.8,
- "ram_used_gb": 25.311710357666016,
- "gpu_memory_used": 1330.0
- },
- {
- "timestamp": "2025-01-03T00:35:08.791851",
- "cpu_percent": 5.3,
- "ram_percent": 39.8,
- "ram_used_gb": 25.326683044433594,
- "gpu_memory_used": 1333.0
- },
- {
- "timestamp": "2025-01-03T00:37:43.782406",
- "cpu_percent": 6.8,
- "ram_percent": 40.6,
- "ram_used_gb": 25.803058624267578,
- "gpu_memory_used": 1409.0
- }
- ]
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json
deleted file mode 100644
index 59ad009..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json
+++ /dev/null
@@ -1,300 +0,0 @@
-{
- "results": [
- {
- "tokens": 100,
- "processing_time": 0.96,
- "output_length": 31.1,
- "rtf": 0.03,
- "elapsed_time": 1.11
- },
- {
- "tokens": 250,
- "processing_time": 2.23,
- "output_length": 77.17,
- "rtf": 0.03,
- "elapsed_time": 3.49
- },
- {
- "tokens": 400,
- "processing_time": 4.05,
- "output_length": 128.05,
- "rtf": 0.03,
- "elapsed_time": 7.77
- },
- {
- "tokens": 550,
- "processing_time": 4.06,
- "output_length": 171.45,
- "rtf": 0.02,
- "elapsed_time": 12.0
- },
- {
- "tokens": 700,
- "processing_time": 6.01,
- "output_length": 221.6,
- "rtf": 0.03,
- "elapsed_time": 18.16
- },
- {
- "tokens": 850,
- "processing_time": 6.9,
- "output_length": 269.1,
- "rtf": 0.03,
- "elapsed_time": 25.21
- },
- {
- "tokens": 1000,
- "processing_time": 7.65,
- "output_length": 315.05,
- "rtf": 0.02,
- "elapsed_time": 33.03
- },
- {
- "tokens": 6000,
- "processing_time": 48.7,
- "output_length": 1837.1,
- "rtf": 0.03,
- "elapsed_time": 82.21
- },
- {
- "tokens": 11000,
- "processing_time": 92.44,
- "output_length": 3388.57,
- "rtf": 0.03,
- "elapsed_time": 175.46
- },
- {
- "tokens": 16000,
- "processing_time": 163.61,
- "output_length": 4977.32,
- "rtf": 0.03,
- "elapsed_time": 340.46
- },
- {
- "tokens": 21000,
- "processing_time": 209.72,
- "output_length": 6533.3,
- "rtf": 0.03,
- "elapsed_time": 551.92
- },
- {
- "tokens": 26000,
- "processing_time": 329.35,
- "output_length": 8068.15,
- "rtf": 0.04,
- "elapsed_time": 883.37
- },
- {
- "tokens": 31000,
- "processing_time": 473.52,
- "output_length": 9611.48,
- "rtf": 0.05,
- "elapsed_time": 1359.28
- },
- {
- "tokens": 36000,
- "processing_time": 650.98,
- "output_length": 11157.15,
- "rtf": 0.06,
- "elapsed_time": 2012.9
- }
- ],
- "system_metrics": [
- {
- "timestamp": "2025-01-03T14:41:01.331735",
- "cpu_percent": 7.5,
- "ram_percent": 50.2,
- "ram_used_gb": 31.960269927978516,
- "gpu_memory_used": 3191.0
- },
- {
- "timestamp": "2025-01-03T14:41:02.357116",
- "cpu_percent": 17.01,
- "ram_percent": 50.2,
- "ram_used_gb": 31.96163558959961,
- "gpu_memory_used": 3426.0
- },
- {
- "timestamp": "2025-01-03T14:41:02.445009",
- "cpu_percent": 9.5,
- "ram_percent": 50.3,
- "ram_used_gb": 31.966781616210938,
- "gpu_memory_used": 3426.0
- },
- {
- "timestamp": "2025-01-03T14:41:04.742152",
- "cpu_percent": 18.27,
- "ram_percent": 50.4,
- "ram_used_gb": 32.08788299560547,
- "gpu_memory_used": 3642.0
- },
- {
- "timestamp": "2025-01-03T14:41:04.847795",
- "cpu_percent": 16.27,
- "ram_percent": 50.5,
- "ram_used_gb": 32.094364166259766,
- "gpu_memory_used": 3640.0
- },
- {
- "timestamp": "2025-01-03T14:41:09.019590",
- "cpu_percent": 15.97,
- "ram_percent": 50.7,
- "ram_used_gb": 32.23244094848633,
- "gpu_memory_used": 3640.0
- },
- {
- "timestamp": "2025-01-03T14:41:09.110324",
- "cpu_percent": 3.54,
- "ram_percent": 50.7,
- "ram_used_gb": 32.234458923339844,
- "gpu_memory_used": 3640.0
- },
- {
- "timestamp": "2025-01-03T14:41:13.252607",
- "cpu_percent": 13.4,
- "ram_percent": 50.6,
- "ram_used_gb": 32.194271087646484,
- "gpu_memory_used": 3935.0
- },
- {
- "timestamp": "2025-01-03T14:41:13.327557",
- "cpu_percent": 4.69,
- "ram_percent": 50.6,
- "ram_used_gb": 32.191776275634766,
- "gpu_memory_used": 3935.0
- },
- {
- "timestamp": "2025-01-03T14:41:19.413633",
- "cpu_percent": 12.92,
- "ram_percent": 50.9,
- "ram_used_gb": 32.3467903137207,
- "gpu_memory_used": 4250.0
- },
- {
- "timestamp": "2025-01-03T14:41:19.492758",
- "cpu_percent": 7.5,
- "ram_percent": 50.8,
- "ram_used_gb": 32.34375,
- "gpu_memory_used": 4250.0
- },
- {
- "timestamp": "2025-01-03T14:41:26.467284",
- "cpu_percent": 13.09,
- "ram_percent": 51.2,
- "ram_used_gb": 32.56281280517578,
- "gpu_memory_used": 4249.0
- },
- {
- "timestamp": "2025-01-03T14:41:26.553559",
- "cpu_percent": 8.39,
- "ram_percent": 51.2,
- "ram_used_gb": 32.56183624267578,
- "gpu_memory_used": 4249.0
- },
- {
- "timestamp": "2025-01-03T14:41:34.284362",
- "cpu_percent": 12.61,
- "ram_percent": 51.7,
- "ram_used_gb": 32.874778747558594,
- "gpu_memory_used": 4250.0
- },
- {
- "timestamp": "2025-01-03T14:41:34.362353",
- "cpu_percent": 1.25,
- "ram_percent": 51.7,
- "ram_used_gb": 32.87461471557617,
- "gpu_memory_used": 4250.0
- },
- {
- "timestamp": "2025-01-03T14:42:23.471312",
- "cpu_percent": 11.64,
- "ram_percent": 54.9,
- "ram_used_gb": 34.90264129638672,
- "gpu_memory_used": 4647.0
- },
- {
- "timestamp": "2025-01-03T14:42:23.547203",
- "cpu_percent": 5.31,
- "ram_percent": 54.9,
- "ram_used_gb": 34.91563415527344,
- "gpu_memory_used": 4647.0
- },
- {
- "timestamp": "2025-01-03T14:43:56.724933",
- "cpu_percent": 12.97,
- "ram_percent": 59.5,
- "ram_used_gb": 37.84241485595703,
- "gpu_memory_used": 4655.0
- },
- {
- "timestamp": "2025-01-03T14:43:56.815453",
- "cpu_percent": 11.75,
- "ram_percent": 59.5,
- "ram_used_gb": 37.832679748535156,
- "gpu_memory_used": 4655.0
- },
- {
- "timestamp": "2025-01-03T14:46:41.705155",
- "cpu_percent": 12.94,
- "ram_percent": 66.3,
- "ram_used_gb": 42.1534538269043,
- "gpu_memory_used": 4729.0
- },
- {
- "timestamp": "2025-01-03T14:46:41.835177",
- "cpu_percent": 7.73,
- "ram_percent": 66.2,
- "ram_used_gb": 42.13554000854492,
- "gpu_memory_used": 4729.0
- },
- {
- "timestamp": "2025-01-03T14:50:13.166236",
- "cpu_percent": 11.62,
- "ram_percent": 73.4,
- "ram_used_gb": 46.71288299560547,
- "gpu_memory_used": 4676.0
- },
- {
- "timestamp": "2025-01-03T14:50:13.261611",
- "cpu_percent": 8.16,
- "ram_percent": 73.4,
- "ram_used_gb": 46.71356201171875,
- "gpu_memory_used": 4676.0
- },
- {
- "timestamp": "2025-01-03T14:55:44.623607",
- "cpu_percent": 12.92,
- "ram_percent": 82.8,
- "ram_used_gb": 52.65533447265625,
- "gpu_memory_used": 4636.0
- },
- {
- "timestamp": "2025-01-03T14:55:44.735410",
- "cpu_percent": 15.29,
- "ram_percent": 82.7,
- "ram_used_gb": 52.63290786743164,
- "gpu_memory_used": 4636.0
- },
- {
- "timestamp": "2025-01-03T15:03:40.534449",
- "cpu_percent": 13.88,
- "ram_percent": 85.0,
- "ram_used_gb": 54.050071716308594,
- "gpu_memory_used": 4771.0
- },
- {
- "timestamp": "2025-01-03T15:03:40.638708",
- "cpu_percent": 12.21,
- "ram_percent": 85.0,
- "ram_used_gb": 54.053733825683594,
- "gpu_memory_used": 4771.0
- },
- {
- "timestamp": "2025-01-03T15:14:34.159142",
- "cpu_percent": 14.51,
- "ram_percent": 78.1,
- "ram_used_gb": 49.70396423339844,
- "gpu_memory_used": 4739.0
- }
- ]
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt
deleted file mode 100644
index 010d116..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-=== Benchmark Statistics (with correct RTF) ===
-
-Overall Stats:
-Total tokens processed: 5500
-Total audio generated: 1741.65s
-Total test duration: 831.07s
-Average processing rate: 6.72 tokens/second
-Average RTF: 0.47x
-
-Per-chunk Stats:
-Average chunk size: 550.00 tokens
-Min chunk size: 100.00 tokens
-Max chunk size: 1000.00 tokens
-Average processing time: 82.70s
-Average output length: 174.17s
-
-Performance Ranges:
-Processing rate range: 5.63 - 7.17 tokens/second
-RTF range: 0.44x - 0.55x
diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt
deleted file mode 100644
index e7bed5f..0000000
--- a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-=== Benchmark Statistics (with correct RTF) ===
-
-Overall Stats:
-Total tokens processed: 150850
-Total audio generated: 46786.59s
-Total test duration: 2012.90s
-Average processing rate: 104.34 tokens/second
-Average RTF: 0.03x
-
diff --git a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json
deleted file mode 100644
index edcb334..0000000
--- a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json
+++ /dev/null
@@ -1,1804 +0,0 @@
-{
- "results": [
- {
- "tokens": 300,
- "processing_time": 41.62,
- "output_length": 96.425,
- "rtf": 0.43,
- "elapsed_time": 41.68
- },
- {
- "tokens": 600,
- "processing_time": 81.72,
- "output_length": 188.675,
- "rtf": 0.43,
- "elapsed_time": 123.49
- },
- {
- "tokens": 900,
- "processing_time": 120.55,
- "output_length": 283.425,
- "rtf": 0.43,
- "elapsed_time": 244.1
- }
- ],
- "system_metrics": [
- {
- "timestamp": "2025-01-04T01:30:26.991154",
- "cpu_percent": 7.83,
- "ram_percent": 48.2,
- "ram_used_gb": 30.669906616210938,
- "gpu_memory_used": 1243.0,
- "relative_time": 0.07800030708312988
- },
- {
- "timestamp": "2025-01-04T01:30:28.079669",
- "cpu_percent": 59.43,
- "ram_percent": 48.2,
- "ram_used_gb": 30.675106048583984,
- "gpu_memory_used": 1244.0,
- "relative_time": 1.1842052936553955
- },
- {
- "timestamp": "2025-01-04T01:30:29.185881",
- "cpu_percent": 57.14,
- "ram_percent": 47.9,
- "ram_used_gb": 30.473060607910156,
- "gpu_memory_used": 1246.0,
- "relative_time": 2.31345796585083
- },
- {
- "timestamp": "2025-01-04T01:30:30.312825",
- "cpu_percent": 49.54,
- "ram_percent": 47.9,
- "ram_used_gb": 30.49838638305664,
- "gpu_memory_used": 1248.0,
- "relative_time": 3.42720627784729
- },
- {
- "timestamp": "2025-01-04T01:30:31.421201",
- "cpu_percent": 47.16,
- "ram_percent": 47.9,
- "ram_used_gb": 30.44550323486328,
- "gpu_memory_used": 1251.0,
- "relative_time": 4.517812728881836
- },
- {
- "timestamp": "2025-01-04T01:30:32.514913",
- "cpu_percent": 47.98,
- "ram_percent": 47.8,
- "ram_used_gb": 30.41952896118164,
- "gpu_memory_used": 1251.0,
- "relative_time": 5.647390604019165
- },
- {
- "timestamp": "2025-01-04T01:30:33.649021",
- "cpu_percent": 48.55,
- "ram_percent": 47.8,
- "ram_used_gb": 30.400592803955078,
- "gpu_memory_used": 1249.0,
- "relative_time": 6.729969263076782
- },
- {
- "timestamp": "2025-01-04T01:30:34.723785",
- "cpu_percent": 43.88,
- "ram_percent": 47.8,
- "ram_used_gb": 30.390079498291016,
- "gpu_memory_used": 1253.0,
- "relative_time": 7.860571622848511
- },
- {
- "timestamp": "2025-01-04T01:30:35.864707",
- "cpu_percent": 50.01,
- "ram_percent": 47.8,
- "ram_used_gb": 30.380477905273438,
- "gpu_memory_used": 1253.0,
- "relative_time": 8.9869704246521
- },
- {
- "timestamp": "2025-01-04T01:30:36.982950",
- "cpu_percent": 49.29,
- "ram_percent": 47.8,
- "ram_used_gb": 30.41130828857422,
- "gpu_memory_used": 1255.0,
- "relative_time": 10.097310066223145
- },
- {
- "timestamp": "2025-01-04T01:30:38.099505",
- "cpu_percent": 52.99,
- "ram_percent": 47.8,
- "ram_used_gb": 30.410892486572266,
- "gpu_memory_used": 1252.0,
- "relative_time": 11.204046249389648
- },
- {
- "timestamp": "2025-01-04T01:30:39.205066",
- "cpu_percent": 42.98,
- "ram_percent": 47.8,
- "ram_used_gb": 30.40534210205078,
- "gpu_memory_used": 1253.0,
- "relative_time": 12.306914329528809
- },
- {
- "timestamp": "2025-01-04T01:30:40.305591",
- "cpu_percent": 47.11,
- "ram_percent": 47.8,
- "ram_used_gb": 30.40200424194336,
- "gpu_memory_used": 1253.0,
- "relative_time": 13.411193370819092
- },
- {
- "timestamp": "2025-01-04T01:30:41.410928",
- "cpu_percent": 50.09,
- "ram_percent": 47.8,
- "ram_used_gb": 30.39764404296875,
- "gpu_memory_used": 1260.0,
- "relative_time": 14.534100770950317
- },
- {
- "timestamp": "2025-01-04T01:30:42.530654",
- "cpu_percent": 57.82,
- "ram_percent": 47.8,
- "ram_used_gb": 30.39893341064453,
- "gpu_memory_used": 1256.0,
- "relative_time": 15.66111135482788
- },
- {
- "timestamp": "2025-01-04T01:30:43.666031",
- "cpu_percent": 52.61,
- "ram_percent": 47.8,
- "ram_used_gb": 30.37706756591797,
- "gpu_memory_used": 1256.0,
- "relative_time": 16.79327368736267
- },
- {
- "timestamp": "2025-01-04T01:30:44.794904",
- "cpu_percent": 57.14,
- "ram_percent": 47.7,
- "ram_used_gb": 30.36868667602539,
- "gpu_memory_used": 1256.0,
- "relative_time": 17.861677646636963
- },
- {
- "timestamp": "2025-01-04T01:30:45.865891",
- "cpu_percent": 66.7,
- "ram_percent": 47.7,
- "ram_used_gb": 30.371902465820312,
- "gpu_memory_used": 1257.0,
- "relative_time": 18.96451497077942
- },
- {
- "timestamp": "2025-01-04T01:30:46.971206",
- "cpu_percent": 53.61,
- "ram_percent": 47.7,
- "ram_used_gb": 30.352508544921875,
- "gpu_memory_used": 1254.0,
- "relative_time": 20.086195945739746
- },
- {
- "timestamp": "2025-01-04T01:30:48.089632",
- "cpu_percent": 50.26,
- "ram_percent": 47.7,
- "ram_used_gb": 30.349388122558594,
- "gpu_memory_used": 1248.0,
- "relative_time": 21.199003219604492
- },
- {
- "timestamp": "2025-01-04T01:30:49.191842",
- "cpu_percent": 48.22,
- "ram_percent": 47.7,
- "ram_used_gb": 30.344642639160156,
- "gpu_memory_used": 1251.0,
- "relative_time": 22.322958946228027
- },
- {
- "timestamp": "2025-01-04T01:30:50.324994",
- "cpu_percent": 55.64,
- "ram_percent": 47.7,
- "ram_used_gb": 30.35323715209961,
- "gpu_memory_used": 1251.0,
- "relative_time": 23.469967365264893
- },
- {
- "timestamp": "2025-01-04T01:30:51.477231",
- "cpu_percent": 45.68,
- "ram_percent": 47.7,
- "ram_used_gb": 30.35232162475586,
- "gpu_memory_used": 1251.0,
- "relative_time": 24.579415798187256
- },
- {
- "timestamp": "2025-01-04T01:30:52.585934",
- "cpu_percent": 46.07,
- "ram_percent": 47.7,
- "ram_used_gb": 30.32147216796875,
- "gpu_memory_used": 1244.0,
- "relative_time": 25.71301007270813
- },
- {
- "timestamp": "2025-01-04T01:30:53.707821",
- "cpu_percent": 47.54,
- "ram_percent": 47.6,
- "ram_used_gb": 30.296611785888672,
- "gpu_memory_used": 1244.0,
- "relative_time": 26.7750301361084
- },
- {
- "timestamp": "2025-01-04T01:30:54.766880",
- "cpu_percent": 44.8,
- "ram_percent": 47.6,
- "ram_used_gb": 30.28769302368164,
- "gpu_memory_used": 1237.0,
- "relative_time": 27.87526297569275
- },
- {
- "timestamp": "2025-01-04T01:30:55.873403",
- "cpu_percent": 48.82,
- "ram_percent": 47.6,
- "ram_used_gb": 30.285594940185547,
- "gpu_memory_used": 1237.0,
- "relative_time": 29.00292205810547
- },
- {
- "timestamp": "2025-01-04T01:30:57.003386",
- "cpu_percent": 55.54,
- "ram_percent": 47.6,
- "ram_used_gb": 30.30721664428711,
- "gpu_memory_used": 1237.0,
- "relative_time": 30.13248038291931
- },
- {
- "timestamp": "2025-01-04T01:30:58.135723",
- "cpu_percent": 46.97,
- "ram_percent": 47.7,
- "ram_used_gb": 30.319698333740234,
- "gpu_memory_used": 1237.0,
- "relative_time": 31.280652046203613
- },
- {
- "timestamp": "2025-01-04T01:30:59.274397",
- "cpu_percent": 46.94,
- "ram_percent": 47.7,
- "ram_used_gb": 30.31420135498047,
- "gpu_memory_used": 1239.0,
- "relative_time": 32.39983797073364
- },
- {
- "timestamp": "2025-01-04T01:31:00.405545",
- "cpu_percent": 53.81,
- "ram_percent": 47.7,
- "ram_used_gb": 30.335922241210938,
- "gpu_memory_used": 1243.0,
- "relative_time": 33.502938985824585
- },
- {
- "timestamp": "2025-01-04T01:31:01.497496",
- "cpu_percent": 51.0,
- "ram_percent": 47.7,
- "ram_used_gb": 30.325199127197266,
- "gpu_memory_used": 1243.0,
- "relative_time": 34.584938526153564
- },
- {
- "timestamp": "2025-01-04T01:31:02.583134",
- "cpu_percent": 49.26,
- "ram_percent": 47.6,
- "ram_used_gb": 30.30097198486328,
- "gpu_memory_used": 1243.0,
- "relative_time": 35.680947065353394
- },
- {
- "timestamp": "2025-01-04T01:31:03.686381",
- "cpu_percent": 48.91,
- "ram_percent": 47.6,
- "ram_used_gb": 30.300418853759766,
- "gpu_memory_used": 1243.0,
- "relative_time": 36.786722898483276
- },
- {
- "timestamp": "2025-01-04T01:31:04.786497",
- "cpu_percent": 48.69,
- "ram_percent": 47.6,
- "ram_used_gb": 30.29620361328125,
- "gpu_memory_used": 1243.0,
- "relative_time": 37.90794491767883
- },
- {
- "timestamp": "2025-01-04T01:31:05.908563",
- "cpu_percent": 50.43,
- "ram_percent": 47.6,
- "ram_used_gb": 30.29269027709961,
- "gpu_memory_used": 1243.0,
- "relative_time": 39.01517176628113
- },
- {
- "timestamp": "2025-01-04T01:31:07.014496",
- "cpu_percent": 48.22,
- "ram_percent": 47.6,
- "ram_used_gb": 30.298015594482422,
- "gpu_memory_used": 1243.0,
- "relative_time": 40.118446826934814
- },
- {
- "timestamp": "2025-01-04T01:31:08.120066",
- "cpu_percent": 47.47,
- "ram_percent": 47.7,
- "ram_used_gb": 30.312705993652344,
- "gpu_memory_used": 1243.0,
- "relative_time": 41.22802424430847
- },
- {
- "timestamp": "2025-01-04T01:31:09.225367",
- "cpu_percent": 41.09,
- "ram_percent": 47.7,
- "ram_used_gb": 30.34886932373047,
- "gpu_memory_used": 1244.0,
- "relative_time": 42.34174656867981
- },
- {
- "timestamp": "2025-01-04T01:31:10.339308",
- "cpu_percent": 44.12,
- "ram_percent": 47.7,
- "ram_used_gb": 30.353790283203125,
- "gpu_memory_used": 1245.0,
- "relative_time": 43.44456744194031
- },
- {
- "timestamp": "2025-01-04T01:31:11.443944",
- "cpu_percent": 48.99,
- "ram_percent": 47.7,
- "ram_used_gb": 30.34658432006836,
- "gpu_memory_used": 1245.0,
- "relative_time": 44.53658318519592
- },
- {
- "timestamp": "2025-01-04T01:31:12.533026",
- "cpu_percent": 47.62,
- "ram_percent": 47.7,
- "ram_used_gb": 30.318241119384766,
- "gpu_memory_used": 1245.0,
- "relative_time": 45.6171441078186
- },
- {
- "timestamp": "2025-01-04T01:31:13.617044",
- "cpu_percent": 49.3,
- "ram_percent": 47.7,
- "ram_used_gb": 30.318588256835938,
- "gpu_memory_used": 1245.0,
- "relative_time": 46.71653604507446
- },
- {
- "timestamp": "2025-01-04T01:31:14.718976",
- "cpu_percent": 48.42,
- "ram_percent": 47.7,
- "ram_used_gb": 30.316349029541016,
- "gpu_memory_used": 1239.0,
- "relative_time": 47.80844783782959
- },
- {
- "timestamp": "2025-01-04T01:31:15.805079",
- "cpu_percent": 47.56,
- "ram_percent": 47.6,
- "ram_used_gb": 30.30520248413086,
- "gpu_memory_used": 1239.0,
- "relative_time": 48.90499949455261
- },
- {
- "timestamp": "2025-01-04T01:31:16.902878",
- "cpu_percent": 49.11,
- "ram_percent": 47.6,
- "ram_used_gb": 30.306812286376953,
- "gpu_memory_used": 1232.0,
- "relative_time": 50.034260749816895
- },
- {
- "timestamp": "2025-01-04T01:31:18.035723",
- "cpu_percent": 45.81,
- "ram_percent": 47.7,
- "ram_used_gb": 30.32524871826172,
- "gpu_memory_used": 1237.0,
- "relative_time": 51.1371693611145
- },
- {
- "timestamp": "2025-01-04T01:31:19.143169",
- "cpu_percent": 49.94,
- "ram_percent": 47.7,
- "ram_used_gb": 30.323795318603516,
- "gpu_memory_used": 1237.0,
- "relative_time": 52.227344274520874
- },
- {
- "timestamp": "2025-01-04T01:31:20.230256",
- "cpu_percent": 39.57,
- "ram_percent": 47.7,
- "ram_used_gb": 30.330493927001953,
- "gpu_memory_used": 1237.0,
- "relative_time": 53.34033155441284
- },
- {
- "timestamp": "2025-01-04T01:31:21.331797",
- "cpu_percent": 44.34,
- "ram_percent": 47.7,
- "ram_used_gb": 30.330425262451172,
- "gpu_memory_used": 1237.0,
- "relative_time": 54.45246958732605
- },
- {
- "timestamp": "2025-01-04T01:31:22.450663",
- "cpu_percent": 46.87,
- "ram_percent": 47.6,
- "ram_used_gb": 30.3084716796875,
- "gpu_memory_used": 1237.0,
- "relative_time": 55.55728077888489
- },
- {
- "timestamp": "2025-01-04T01:31:23.550691",
- "cpu_percent": 49.88,
- "ram_percent": 47.6,
- "ram_used_gb": 30.309173583984375,
- "gpu_memory_used": 1243.0,
- "relative_time": 56.65515089035034
- },
- {
- "timestamp": "2025-01-04T01:31:24.650939",
- "cpu_percent": 51.21,
- "ram_percent": 47.6,
- "ram_used_gb": 30.30620574951172,
- "gpu_memory_used": 1243.0,
- "relative_time": 57.726617097854614
- },
- {
- "timestamp": "2025-01-04T01:31:25.728955",
- "cpu_percent": 45.22,
- "ram_percent": 47.6,
- "ram_used_gb": 30.291912078857422,
- "gpu_memory_used": 1243.0,
- "relative_time": 58.82792663574219
- },
- {
- "timestamp": "2025-01-04T01:31:26.829490",
- "cpu_percent": 48.86,
- "ram_percent": 47.6,
- "ram_used_gb": 30.289695739746094,
- "gpu_memory_used": 1243.0,
- "relative_time": 59.93786025047302
- },
- {
- "timestamp": "2025-01-04T01:31:27.937071",
- "cpu_percent": 45.69,
- "ram_percent": 47.6,
- "ram_used_gb": 30.302818298339844,
- "gpu_memory_used": 1243.0,
- "relative_time": 61.05047869682312
- },
- {
- "timestamp": "2025-01-04T01:31:29.044046",
- "cpu_percent": 51.09,
- "ram_percent": 47.6,
- "ram_used_gb": 30.307464599609375,
- "gpu_memory_used": 1243.0,
- "relative_time": 62.159112215042114
- },
- {
- "timestamp": "2025-01-04T01:31:30.162426",
- "cpu_percent": 47.04,
- "ram_percent": 47.7,
- "ram_used_gb": 30.32668685913086,
- "gpu_memory_used": 1243.0,
- "relative_time": 63.249592542648315
- },
- {
- "timestamp": "2025-01-04T01:31:31.251755",
- "cpu_percent": 45.32,
- "ram_percent": 47.7,
- "ram_used_gb": 30.330463409423828,
- "gpu_memory_used": 1243.0,
- "relative_time": 64.35896062850952
- },
- {
- "timestamp": "2025-01-04T01:31:32.362284",
- "cpu_percent": 47.2,
- "ram_percent": 47.7,
- "ram_used_gb": 30.314319610595703,
- "gpu_memory_used": 1239.0,
- "relative_time": 65.4672338962555
- },
- {
- "timestamp": "2025-01-04T01:31:33.468921",
- "cpu_percent": 48.94,
- "ram_percent": 47.6,
- "ram_used_gb": 30.308246612548828,
- "gpu_memory_used": 1243.0,
- "relative_time": 66.5955581665039
- },
- {
- "timestamp": "2025-01-04T01:31:34.594176",
- "cpu_percent": 47.88,
- "ram_percent": 47.6,
- "ram_used_gb": 30.29806137084961,
- "gpu_memory_used": 1243.0,
- "relative_time": 67.68029594421387
- },
- {
- "timestamp": "2025-01-04T01:31:35.682260",
- "cpu_percent": 45.92,
- "ram_percent": 47.6,
- "ram_used_gb": 30.299114227294922,
- "gpu_memory_used": 1243.0,
- "relative_time": 68.7970290184021
- },
- {
- "timestamp": "2025-01-04T01:31:36.802433",
- "cpu_percent": 51.07,
- "ram_percent": 47.6,
- "ram_used_gb": 30.29195785522461,
- "gpu_memory_used": 1243.0,
- "relative_time": 69.92168736457825
- },
- {
- "timestamp": "2025-01-04T01:31:37.926464",
- "cpu_percent": 47.29,
- "ram_percent": 47.7,
- "ram_used_gb": 30.324363708496094,
- "gpu_memory_used": 1243.0,
- "relative_time": 71.05467820167542
- },
- {
- "timestamp": "2025-01-04T01:31:39.059936",
- "cpu_percent": 48.91,
- "ram_percent": 47.7,
- "ram_used_gb": 30.32428741455078,
- "gpu_memory_used": 1243.0,
- "relative_time": 72.14405465126038
- },
- {
- "timestamp": "2025-01-04T01:31:40.142859",
- "cpu_percent": 44.66,
- "ram_percent": 47.7,
- "ram_used_gb": 30.33354949951172,
- "gpu_memory_used": 1243.0,
- "relative_time": 73.25559496879578
- },
- {
- "timestamp": "2025-01-04T01:31:41.254868",
- "cpu_percent": 48.98,
- "ram_percent": 47.7,
- "ram_used_gb": 30.344337463378906,
- "gpu_memory_used": 1237.0,
- "relative_time": 74.35676956176758
- },
- {
- "timestamp": "2025-01-04T01:31:42.354977",
- "cpu_percent": 50.79,
- "ram_percent": 47.7,
- "ram_used_gb": 30.322650909423828,
- "gpu_memory_used": 1237.0,
- "relative_time": 75.43929266929626
- },
- {
- "timestamp": "2025-01-04T01:31:43.432869",
- "cpu_percent": 45.86,
- "ram_percent": 47.7,
- "ram_used_gb": 30.316268920898438,
- "gpu_memory_used": 1237.0,
- "relative_time": 76.53794598579407
- },
- {
- "timestamp": "2025-01-04T01:31:44.535917",
- "cpu_percent": 47.22,
- "ram_percent": 47.6,
- "ram_used_gb": 30.308757781982422,
- "gpu_memory_used": 1237.0,
- "relative_time": 77.6620762348175
- },
- {
- "timestamp": "2025-01-04T01:31:45.666281",
- "cpu_percent": 51.06,
- "ram_percent": 47.6,
- "ram_used_gb": 30.307342529296875,
- "gpu_memory_used": 1237.0,
- "relative_time": 78.77155900001526
- },
- {
- "timestamp": "2025-01-04T01:31:46.771605",
- "cpu_percent": 47.82,
- "ram_percent": 47.6,
- "ram_used_gb": 30.298141479492188,
- "gpu_memory_used": 1237.0,
- "relative_time": 79.87201809883118
- },
- {
- "timestamp": "2025-01-04T01:31:47.874817",
- "cpu_percent": 44.51,
- "ram_percent": 47.7,
- "ram_used_gb": 30.322750091552734,
- "gpu_memory_used": 1243.0,
- "relative_time": 80.97521829605103
- },
- {
- "timestamp": "2025-01-04T01:31:48.983338",
- "cpu_percent": 47.69,
- "ram_percent": 47.7,
- "ram_used_gb": 30.3226318359375,
- "gpu_memory_used": 1243.0,
- "relative_time": 82.09707593917847
- },
- {
- "timestamp": "2025-01-04T01:31:50.102541",
- "cpu_percent": 42.36,
- "ram_percent": 47.7,
- "ram_used_gb": 30.32965087890625,
- "gpu_memory_used": 1243.0,
- "relative_time": 83.20944809913635
- },
- {
- "timestamp": "2025-01-04T01:31:51.204766",
- "cpu_percent": 45.87,
- "ram_percent": 47.7,
- "ram_used_gb": 30.32353973388672,
- "gpu_memory_used": 1243.0,
- "relative_time": 84.31531429290771
- },
- {
- "timestamp": "2025-01-04T01:31:52.310873",
- "cpu_percent": 50.01,
- "ram_percent": 47.6,
- "ram_used_gb": 30.296016693115234,
- "gpu_memory_used": 1247.0,
- "relative_time": 85.4254515171051
- },
- {
- "timestamp": "2025-01-04T01:31:53.429342",
- "cpu_percent": 49.65,
- "ram_percent": 47.6,
- "ram_used_gb": 30.306453704833984,
- "gpu_memory_used": 1246.0,
- "relative_time": 86.51991653442383
- },
- {
- "timestamp": "2025-01-04T01:31:54.517894",
- "cpu_percent": 47.29,
- "ram_percent": 47.6,
- "ram_used_gb": 30.30263900756836,
- "gpu_memory_used": 1245.0,
- "relative_time": 87.60364723205566
- },
- {
- "timestamp": "2025-01-04T01:31:55.602848",
- "cpu_percent": 47.48,
- "ram_percent": 47.6,
- "ram_used_gb": 30.303203582763672,
- "gpu_memory_used": 1245.0,
- "relative_time": 88.68531346321106
- },
- {
- "timestamp": "2025-01-04T01:31:56.677895",
- "cpu_percent": 46.74,
- "ram_percent": 47.6,
- "ram_used_gb": 30.29749298095703,
- "gpu_memory_used": 1245.0,
- "relative_time": 89.78639531135559
- },
- {
- "timestamp": "2025-01-04T01:31:57.794084",
- "cpu_percent": 43.92,
- "ram_percent": 47.7,
- "ram_used_gb": 30.313438415527344,
- "gpu_memory_used": 1244.0,
- "relative_time": 90.89922308921814
- },
- {
- "timestamp": "2025-01-04T01:31:58.901464",
- "cpu_percent": 48.88,
- "ram_percent": 47.7,
- "ram_used_gb": 30.32254409790039,
- "gpu_memory_used": 1244.0,
- "relative_time": 91.96823143959045
- },
- {
- "timestamp": "2025-01-04T01:31:59.972227",
- "cpu_percent": 38.89,
- "ram_percent": 47.7,
- "ram_used_gb": 30.32897186279297,
- "gpu_memory_used": 1245.0,
- "relative_time": 93.08689904212952
- },
- {
- "timestamp": "2025-01-04T01:32:01.089013",
- "cpu_percent": 49.22,
- "ram_percent": 47.7,
- "ram_used_gb": 30.328304290771484,
- "gpu_memory_used": 1250.0,
- "relative_time": 94.20951867103577
- },
- {
- "timestamp": "2025-01-04T01:32:02.202304",
- "cpu_percent": 46.56,
- "ram_percent": 47.6,
- "ram_used_gb": 30.29920196533203,
- "gpu_memory_used": 1250.0,
- "relative_time": 95.29210877418518
- },
- {
- "timestamp": "2025-01-04T01:32:03.292108",
- "cpu_percent": 46.39,
- "ram_percent": 47.6,
- "ram_used_gb": 30.308143615722656,
- "gpu_memory_used": 1250.0,
- "relative_time": 96.40629982948303
- },
- {
- "timestamp": "2025-01-04T01:32:04.402400",
- "cpu_percent": 49.88,
- "ram_percent": 47.6,
- "ram_used_gb": 30.310047149658203,
- "gpu_memory_used": 1250.0,
- "relative_time": 97.51973557472229
- },
- {
- "timestamp": "2025-01-04T01:32:05.513450",
- "cpu_percent": 53.28,
- "ram_percent": 47.6,
- "ram_used_gb": 30.30374526977539,
- "gpu_memory_used": 1249.0,
- "relative_time": 98.62612318992615
- },
- {
- "timestamp": "2025-01-04T01:32:06.631627",
- "cpu_percent": 44.65,
- "ram_percent": 47.6,
- "ram_used_gb": 30.30333709716797,
- "gpu_memory_used": 1242.0,
- "relative_time": 99.73457670211792
- },
- {
- "timestamp": "2025-01-04T01:32:07.736449",
- "cpu_percent": 50.93,
- "ram_percent": 47.7,
- "ram_used_gb": 30.33118438720703,
- "gpu_memory_used": 1242.0,
- "relative_time": 100.85807871818542
- },
- {
- "timestamp": "2025-01-04T01:32:08.860429",
- "cpu_percent": 62.71,
- "ram_percent": 47.8,
- "ram_used_gb": 30.41672134399414,
- "gpu_memory_used": 1244.0,
- "relative_time": 102.08941197395325
- },
- {
- "timestamp": "2025-01-04T01:32:10.080974",
- "cpu_percent": 96.29,
- "ram_percent": 47.9,
- "ram_used_gb": 30.45757293701172,
- "gpu_memory_used": 1245.0,
- "relative_time": 103.18154048919678
- },
- {
- "timestamp": "2025-01-04T01:32:11.187912",
- "cpu_percent": 49.09,
- "ram_percent": 47.9,
- "ram_used_gb": 30.445499420166016,
- "gpu_memory_used": 1245.0,
- "relative_time": 104.30198311805725
- },
- {
- "timestamp": "2025-01-04T01:32:12.306213",
- "cpu_percent": 51.15,
- "ram_percent": 47.8,
- "ram_used_gb": 30.42266845703125,
- "gpu_memory_used": 1240.0,
- "relative_time": 105.43745422363281
- },
- {
- "timestamp": "2025-01-04T01:32:13.437791",
- "cpu_percent": 47.79,
- "ram_percent": 47.8,
- "ram_used_gb": 30.40296173095703,
- "gpu_memory_used": 1236.0,
- "relative_time": 106.55609393119812
- },
- {
- "timestamp": "2025-01-04T01:32:14.548441",
- "cpu_percent": 39.41,
- "ram_percent": 47.8,
- "ram_used_gb": 30.406475067138672,
- "gpu_memory_used": 1244.0,
- "relative_time": 107.67082047462463
- },
- {
- "timestamp": "2025-01-04T01:32:15.666526",
- "cpu_percent": 77.07,
- "ram_percent": 47.8,
- "ram_used_gb": 30.424407958984375,
- "gpu_memory_used": 1247.0,
- "relative_time": 108.7851665019989
- },
- {
- "timestamp": "2025-01-04T01:32:16.780793",
- "cpu_percent": 49.13,
- "ram_percent": 47.8,
- "ram_used_gb": 30.429065704345703,
- "gpu_memory_used": 1246.0,
- "relative_time": 109.88107633590698
- },
- {
- "timestamp": "2025-01-04T01:32:17.879071",
- "cpu_percent": 82.96,
- "ram_percent": 47.8,
- "ram_used_gb": 30.428447723388672,
- "gpu_memory_used": 1281.0,
- "relative_time": 111.02328372001648
- },
- {
- "timestamp": "2025-01-04T01:32:19.026978",
- "cpu_percent": 74.64,
- "ram_percent": 47.8,
- "ram_used_gb": 30.430500030517578,
- "gpu_memory_used": 1281.0,
- "relative_time": 112.15347504615784
- },
- {
- "timestamp": "2025-01-04T01:32:20.156784",
- "cpu_percent": 76.94,
- "ram_percent": 47.8,
- "ram_used_gb": 30.40774917602539,
- "gpu_memory_used": 1274.0,
- "relative_time": 113.31317591667175
- },
- {
- "timestamp": "2025-01-04T01:32:21.310871",
- "cpu_percent": 69.52,
- "ram_percent": 47.8,
- "ram_used_gb": 30.429115295410156,
- "gpu_memory_used": 1273.0,
- "relative_time": 114.42301273345947
- },
- {
- "timestamp": "2025-01-04T01:32:22.424508",
- "cpu_percent": 74.47,
- "ram_percent": 47.8,
- "ram_used_gb": 30.403045654296875,
- "gpu_memory_used": 1274.0,
- "relative_time": 115.52539491653442
- },
- {
- "timestamp": "2025-01-04T01:32:23.525673",
- "cpu_percent": 67.2,
- "ram_percent": 47.9,
- "ram_used_gb": 30.484474182128906,
- "gpu_memory_used": 1273.0,
- "relative_time": 116.61319661140442
- },
- {
- "timestamp": "2025-01-04T01:32:24.613302",
- "cpu_percent": 57.41,
- "ram_percent": 47.7,
- "ram_used_gb": 30.35879135131836,
- "gpu_memory_used": 1270.0,
- "relative_time": 117.72619676589966
- },
- {
- "timestamp": "2025-01-04T01:32:25.730732",
- "cpu_percent": 45.97,
- "ram_percent": 47.7,
- "ram_used_gb": 30.347335815429688,
- "gpu_memory_used": 1270.0,
- "relative_time": 118.84320116043091
- },
- {
- "timestamp": "2025-01-04T01:32:26.845420",
- "cpu_percent": 47.74,
- "ram_percent": 47.7,
- "ram_used_gb": 30.354007720947266,
- "gpu_memory_used": 1265.0,
- "relative_time": 119.96074485778809
- },
- {
- "timestamp": "2025-01-04T01:32:27.964248",
- "cpu_percent": 60.0,
- "ram_percent": 47.7,
- "ram_used_gb": 30.3675537109375,
- "gpu_memory_used": 1258.0,
- "relative_time": 121.09439873695374
- },
- {
- "timestamp": "2025-01-04T01:32:29.094542",
- "cpu_percent": 54.46,
- "ram_percent": 47.7,
- "ram_used_gb": 30.367305755615234,
- "gpu_memory_used": 1230.0,
- "relative_time": 122.24102592468262
- },
- {
- "timestamp": "2025-01-04T01:32:30.244200",
- "cpu_percent": 56.21,
- "ram_percent": 47.7,
- "ram_used_gb": 30.364959716796875,
- "gpu_memory_used": 1230.0,
- "relative_time": 123.34450554847717
- },
- {
- "timestamp": "2025-01-04T01:32:31.346103",
- "cpu_percent": 40.66,
- "ram_percent": 47.8,
- "ram_used_gb": 30.420738220214844,
- "gpu_memory_used": 1235.0,
- "relative_time": 124.46777892112732
- },
- {
- "timestamp": "2025-01-04T01:32:32.463710",
- "cpu_percent": 51.66,
- "ram_percent": 47.8,
- "ram_used_gb": 30.396198272705078,
- "gpu_memory_used": 1235.0,
- "relative_time": 125.57916116714478
- },
- {
- "timestamp": "2025-01-04T01:32:33.580811",
- "cpu_percent": 49.68,
- "ram_percent": 47.8,
- "ram_used_gb": 30.40151596069336,
- "gpu_memory_used": 1236.0,
- "relative_time": 126.6768786907196
- },
- {
- "timestamp": "2025-01-04T01:32:34.668960",
- "cpu_percent": 49.09,
- "ram_percent": 47.8,
- "ram_used_gb": 30.380916595458984,
- "gpu_memory_used": 1236.0,
- "relative_time": 127.73568296432495
- },
- {
- "timestamp": "2025-01-04T01:32:35.729484",
- "cpu_percent": 48.53,
- "ram_percent": 47.8,
- "ram_used_gb": 30.385761260986328,
- "gpu_memory_used": 1236.0,
- "relative_time": 128.85891699790955
- },
- {
- "timestamp": "2025-01-04T01:32:36.849812",
- "cpu_percent": 52.39,
- "ram_percent": 47.8,
- "ram_used_gb": 30.414752960205078,
- "gpu_memory_used": 1235.0,
- "relative_time": 129.9150390625
- },
- {
- "timestamp": "2025-01-04T01:32:37.919974",
- "cpu_percent": 46.89,
- "ram_percent": 47.9,
- "ram_used_gb": 30.450218200683594,
- "gpu_memory_used": 1235.0,
- "relative_time": 131.00502228736877
- },
- {
- "timestamp": "2025-01-04T01:32:39.008115",
- "cpu_percent": 46.59,
- "ram_percent": 47.8,
- "ram_used_gb": 30.436458587646484,
- "gpu_memory_used": 1235.0,
- "relative_time": 132.10191130638123
- },
- {
- "timestamp": "2025-01-04T01:32:40.095463",
- "cpu_percent": 45.76,
- "ram_percent": 47.9,
- "ram_used_gb": 30.443893432617188,
- "gpu_memory_used": 1224.0,
- "relative_time": 133.26839780807495
- },
- {
- "timestamp": "2025-01-04T01:32:41.265737",
- "cpu_percent": 56.94,
- "ram_percent": 47.8,
- "ram_used_gb": 30.41216278076172,
- "gpu_memory_used": 1224.0,
- "relative_time": 134.32926607131958
- },
- {
- "timestamp": "2025-01-04T01:32:42.321015",
- "cpu_percent": 40.36,
- "ram_percent": 47.8,
- "ram_used_gb": 30.386669158935547,
- "gpu_memory_used": 1224.0,
- "relative_time": 135.40537309646606
- },
- {
- "timestamp": "2025-01-04T01:32:43.400382",
- "cpu_percent": 44.51,
- "ram_percent": 47.8,
- "ram_used_gb": 30.39049530029297,
- "gpu_memory_used": 1224.0,
- "relative_time": 136.52469301223755
- },
- {
- "timestamp": "2025-01-04T01:32:44.524119",
- "cpu_percent": 50.29,
- "ram_percent": 47.8,
- "ram_used_gb": 30.38903045654297,
- "gpu_memory_used": 1220.0,
- "relative_time": 137.60522270202637
- },
- {
- "timestamp": "2025-01-04T01:32:45.599869",
- "cpu_percent": 51.69,
- "ram_percent": 47.8,
- "ram_used_gb": 30.378681182861328,
- "gpu_memory_used": 1213.0,
- "relative_time": 138.7130560874939
- },
- {
- "timestamp": "2025-01-04T01:32:46.711674",
- "cpu_percent": 49.55,
- "ram_percent": 47.7,
- "ram_used_gb": 30.34076690673828,
- "gpu_memory_used": 1213.0,
- "relative_time": 139.8105547428131
- },
- {
- "timestamp": "2025-01-04T01:32:47.813091",
- "cpu_percent": 44.5,
- "ram_percent": 47.7,
- "ram_used_gb": 30.343746185302734,
- "gpu_memory_used": 1213.0,
- "relative_time": 140.91643166542053
- },
- {
- "timestamp": "2025-01-04T01:32:48.917679",
- "cpu_percent": 43.76,
- "ram_percent": 47.7,
- "ram_used_gb": 30.354793548583984,
- "gpu_memory_used": 1213.0,
- "relative_time": 142.04264283180237
- },
- {
- "timestamp": "2025-01-04T01:32:50.047653",
- "cpu_percent": 48.41,
- "ram_percent": 47.7,
- "ram_used_gb": 30.361080169677734,
- "gpu_memory_used": 1219.0,
- "relative_time": 143.14667677879333
- },
- {
- "timestamp": "2025-01-04T01:32:51.153490",
- "cpu_percent": 57.01,
- "ram_percent": 47.9,
- "ram_used_gb": 30.4390869140625,
- "gpu_memory_used": 1232.0,
- "relative_time": 144.2709481716156
- },
- {
- "timestamp": "2025-01-04T01:32:52.272196",
- "cpu_percent": 54.69,
- "ram_percent": 47.9,
- "ram_used_gb": 30.46664047241211,
- "gpu_memory_used": 1236.0,
- "relative_time": 145.36933588981628
- },
- {
- "timestamp": "2025-01-04T01:32:53.374563",
- "cpu_percent": 51.37,
- "ram_percent": 47.9,
- "ram_used_gb": 30.487388610839844,
- "gpu_memory_used": 1245.0,
- "relative_time": 146.4400930404663
- },
- {
- "timestamp": "2025-01-04T01:32:54.445178",
- "cpu_percent": 47.76,
- "ram_percent": 47.9,
- "ram_used_gb": 30.475635528564453,
- "gpu_memory_used": 1241.0,
- "relative_time": 147.5295627117157
- },
- {
- "timestamp": "2025-01-04T01:32:55.520495",
- "cpu_percent": 49.24,
- "ram_percent": 47.9,
- "ram_used_gb": 30.47634506225586,
- "gpu_memory_used": 1236.0,
- "relative_time": 148.5926468372345
- },
- {
- "timestamp": "2025-01-04T01:32:56.591995",
- "cpu_percent": 53.63,
- "ram_percent": 47.9,
- "ram_used_gb": 30.49687957763672,
- "gpu_memory_used": 1251.0,
- "relative_time": 149.72928547859192
- },
- {
- "timestamp": "2025-01-04T01:32:57.727346",
- "cpu_percent": 65.04,
- "ram_percent": 48.1,
- "ram_used_gb": 30.59111785888672,
- "gpu_memory_used": 1251.0,
- "relative_time": 150.86237502098083
- },
- {
- "timestamp": "2025-01-04T01:32:58.862812",
- "cpu_percent": 71.05,
- "ram_percent": 48.2,
- "ram_used_gb": 30.633731842041016,
- "gpu_memory_used": 1263.0,
- "relative_time": 152.03348207473755
- },
- {
- "timestamp": "2025-01-04T01:33:00.037915",
- "cpu_percent": 85.87,
- "ram_percent": 48.2,
- "ram_used_gb": 30.68001937866211,
- "gpu_memory_used": 1253.0,
- "relative_time": 153.1551034450531
- },
- {
- "timestamp": "2025-01-04T01:33:01.158119",
- "cpu_percent": 59.8,
- "ram_percent": 48.2,
- "ram_used_gb": 30.69198989868164,
- "gpu_memory_used": 1252.0,
- "relative_time": 154.2606840133667
- },
- {
- "timestamp": "2025-01-04T01:33:02.262390",
- "cpu_percent": 45.33,
- "ram_percent": 48.3,
- "ram_used_gb": 30.743839263916016,
- "gpu_memory_used": 1252.0,
- "relative_time": 155.3663365840912
- },
- {
- "timestamp": "2025-01-04T01:33:03.369936",
- "cpu_percent": 35.41,
- "ram_percent": 48.2,
- "ram_used_gb": 30.68472671508789,
- "gpu_memory_used": 1252.0,
- "relative_time": 156.4842345714569
- },
- {
- "timestamp": "2025-01-04T01:33:04.488089",
- "cpu_percent": 47.22,
- "ram_percent": 48.4,
- "ram_used_gb": 30.78485870361328,
- "gpu_memory_used": 1254.0,
- "relative_time": 157.58868670463562
- },
- {
- "timestamp": "2025-01-04T01:33:05.592303",
- "cpu_percent": 36.14,
- "ram_percent": 48.5,
- "ram_used_gb": 30.87320327758789,
- "gpu_memory_used": 1254.0,
- "relative_time": 158.71629786491394
- },
- {
- "timestamp": "2025-01-04T01:33:06.721317",
- "cpu_percent": 38.46,
- "ram_percent": 48.2,
- "ram_used_gb": 30.668170928955078,
- "gpu_memory_used": 1254.0,
- "relative_time": 159.82655477523804
- },
- {
- "timestamp": "2025-01-04T01:33:07.827187",
- "cpu_percent": 35.81,
- "ram_percent": 48.4,
- "ram_used_gb": 30.777912139892578,
- "gpu_memory_used": 1254.0,
- "relative_time": 160.94229197502136
- },
- {
- "timestamp": "2025-01-04T01:33:08.943035",
- "cpu_percent": 39.24,
- "ram_percent": 48.5,
- "ram_used_gb": 30.86941146850586,
- "gpu_memory_used": 1254.0,
- "relative_time": 162.06378889083862
- },
- {
- "timestamp": "2025-01-04T01:33:10.063208",
- "cpu_percent": 51.52,
- "ram_percent": 48.1,
- "ram_used_gb": 30.624229431152344,
- "gpu_memory_used": 1254.0,
- "relative_time": 163.16198420524597
- },
- {
- "timestamp": "2025-01-04T01:33:11.163067",
- "cpu_percent": 48.99,
- "ram_percent": 48.1,
- "ram_used_gb": 30.612281799316406,
- "gpu_memory_used": 1254.0,
- "relative_time": 164.26579809188843
- },
- {
- "timestamp": "2025-01-04T01:33:12.266417",
- "cpu_percent": 46.27,
- "ram_percent": 48.1,
- "ram_used_gb": 30.584861755371094,
- "gpu_memory_used": 1252.0,
- "relative_time": 165.35981583595276
- },
- {
- "timestamp": "2025-01-04T01:33:13.354673",
- "cpu_percent": 45.71,
- "ram_percent": 48.1,
- "ram_used_gb": 30.582279205322266,
- "gpu_memory_used": 1252.0,
- "relative_time": 166.45263361930847
- },
- {
- "timestamp": "2025-01-04T01:33:14.447308",
- "cpu_percent": 48.69,
- "ram_percent": 48.1,
- "ram_used_gb": 30.584793090820312,
- "gpu_memory_used": 1252.0,
- "relative_time": 167.54857754707336
- },
- {
- "timestamp": "2025-01-04T01:33:15.552042",
- "cpu_percent": 48.66,
- "ram_percent": 48.1,
- "ram_used_gb": 30.580883026123047,
- "gpu_memory_used": 1252.0,
- "relative_time": 168.659592628479
- },
- {
- "timestamp": "2025-01-04T01:33:16.653015",
- "cpu_percent": 50.37,
- "ram_percent": 48.1,
- "ram_used_gb": 30.573726654052734,
- "gpu_memory_used": 1252.0,
- "relative_time": 169.7969992160797
- },
- {
- "timestamp": "2025-01-04T01:33:17.802854",
- "cpu_percent": 49.45,
- "ram_percent": 48.1,
- "ram_used_gb": 30.587318420410156,
- "gpu_memory_used": 1252.0,
- "relative_time": 170.891606092453
- },
- {
- "timestamp": "2025-01-04T01:33:18.893192",
- "cpu_percent": 50.16,
- "ram_percent": 48.1,
- "ram_used_gb": 30.5953369140625,
- "gpu_memory_used": 1252.0,
- "relative_time": 172.0133557319641
- },
- {
- "timestamp": "2025-01-04T01:33:20.008593",
- "cpu_percent": 47.57,
- "ram_percent": 48.1,
- "ram_used_gb": 30.6124267578125,
- "gpu_memory_used": 1252.0,
- "relative_time": 173.0913679599762
- },
- {
- "timestamp": "2025-01-04T01:33:21.097576",
- "cpu_percent": 44.32,
- "ram_percent": 48.1,
- "ram_used_gb": 30.584686279296875,
- "gpu_memory_used": 1252.0,
- "relative_time": 174.20030999183655
- },
- {
- "timestamp": "2025-01-04T01:33:22.201335",
- "cpu_percent": 49.01,
- "ram_percent": 48.0,
- "ram_used_gb": 30.547630310058594,
- "gpu_memory_used": 1252.0,
- "relative_time": 175.30235862731934
- },
- {
- "timestamp": "2025-01-04T01:33:23.306131",
- "cpu_percent": 43.7,
- "ram_percent": 48.0,
- "ram_used_gb": 30.559757232666016,
- "gpu_memory_used": 1251.0,
- "relative_time": 176.40550017356873
- },
- {
- "timestamp": "2025-01-04T01:33:24.408896",
- "cpu_percent": 48.77,
- "ram_percent": 48.0,
- "ram_used_gb": 30.5601806640625,
- "gpu_memory_used": 1251.0,
- "relative_time": 177.4984576702118
- },
- {
- "timestamp": "2025-01-04T01:33:25.496705",
- "cpu_percent": 50.56,
- "ram_percent": 48.0,
- "ram_used_gb": 30.556926727294922,
- "gpu_memory_used": 1251.0,
- "relative_time": 178.58782863616943
- },
- {
- "timestamp": "2025-01-04T01:33:26.588438",
- "cpu_percent": 47.76,
- "ram_percent": 48.0,
- "ram_used_gb": 30.53600311279297,
- "gpu_memory_used": 1251.0,
- "relative_time": 179.67969870567322
- },
- {
- "timestamp": "2025-01-04T01:33:27.679807",
- "cpu_percent": 49.0,
- "ram_percent": 48.0,
- "ram_used_gb": 30.540546417236328,
- "gpu_memory_used": 1251.0,
- "relative_time": 180.78388810157776
- },
- {
- "timestamp": "2025-01-04T01:33:28.780263",
- "cpu_percent": 49.25,
- "ram_percent": 48.0,
- "ram_used_gb": 30.55233383178711,
- "gpu_memory_used": 1251.0,
- "relative_time": 181.88185930252075
- },
- {
- "timestamp": "2025-01-04T01:33:29.881869",
- "cpu_percent": 47.08,
- "ram_percent": 48.1,
- "ram_used_gb": 30.56603240966797,
- "gpu_memory_used": 1251.0,
- "relative_time": 182.9666450023651
- },
- {
- "timestamp": "2025-01-04T01:33:30.957821",
- "cpu_percent": 45.77,
- "ram_percent": 48.0,
- "ram_used_gb": 30.559410095214844,
- "gpu_memory_used": 1251.0,
- "relative_time": 184.05338644981384
- },
- {
- "timestamp": "2025-01-04T01:33:32.047377",
- "cpu_percent": 50.79,
- "ram_percent": 48.0,
- "ram_used_gb": 30.534175872802734,
- "gpu_memory_used": 1251.0,
- "relative_time": 185.17484974861145
- },
- {
- "timestamp": "2025-01-04T01:33:33.167413",
- "cpu_percent": 52.13,
- "ram_percent": 48.0,
- "ram_used_gb": 30.54046630859375,
- "gpu_memory_used": 1266.0,
- "relative_time": 186.23550605773926
- },
- {
- "timestamp": "2025-01-04T01:33:34.226743",
- "cpu_percent": 43.81,
- "ram_percent": 48.0,
- "ram_used_gb": 30.54621124267578,
- "gpu_memory_used": 1266.0,
- "relative_time": 187.30887961387634
- },
- {
- "timestamp": "2025-01-04T01:33:35.303398",
- "cpu_percent": 49.28,
- "ram_percent": 48.0,
- "ram_used_gb": 30.545230865478516,
- "gpu_memory_used": 1264.0,
- "relative_time": 188.40410709381104
- },
- {
- "timestamp": "2025-01-04T01:33:36.405660",
- "cpu_percent": 46.44,
- "ram_percent": 48.0,
- "ram_used_gb": 30.540679931640625,
- "gpu_memory_used": 1264.0,
- "relative_time": 189.47515082359314
- },
- {
- "timestamp": "2025-01-04T01:33:37.469955",
- "cpu_percent": 41.6,
- "ram_percent": 48.0,
- "ram_used_gb": 30.562320709228516,
- "gpu_memory_used": 1264.0,
- "relative_time": 190.56309294700623
- },
- {
- "timestamp": "2025-01-04T01:33:38.556728",
- "cpu_percent": 50.52,
- "ram_percent": 48.0,
- "ram_used_gb": 30.561084747314453,
- "gpu_memory_used": 1264.0,
- "relative_time": 191.66572499275208
- },
- {
- "timestamp": "2025-01-04T01:33:39.665385",
- "cpu_percent": 40.93,
- "ram_percent": 48.1,
- "ram_used_gb": 30.577682495117188,
- "gpu_memory_used": 1264.0,
- "relative_time": 192.76011109352112
- },
- {
- "timestamp": "2025-01-04T01:33:40.754482",
- "cpu_percent": 50.46,
- "ram_percent": 48.1,
- "ram_used_gb": 30.5740966796875,
- "gpu_memory_used": 1262.0,
- "relative_time": 193.90924453735352
- },
- {
- "timestamp": "2025-01-04T01:33:41.903437",
- "cpu_percent": 52.75,
- "ram_percent": 48.1,
- "ram_used_gb": 30.58869171142578,
- "gpu_memory_used": 1258.0,
- "relative_time": 195.0148274898529
- },
- {
- "timestamp": "2025-01-04T01:33:43.008520",
- "cpu_percent": 50.04,
- "ram_percent": 48.0,
- "ram_used_gb": 30.560386657714844,
- "gpu_memory_used": 1258.0,
- "relative_time": 196.12349009513855
- },
- {
- "timestamp": "2025-01-04T01:33:44.129194",
- "cpu_percent": 51.56,
- "ram_percent": 48.1,
- "ram_used_gb": 30.572277069091797,
- "gpu_memory_used": 1258.0,
- "relative_time": 197.20997285842896
- },
- {
- "timestamp": "2025-01-04T01:33:45.212927",
- "cpu_percent": 47.77,
- "ram_percent": 48.0,
- "ram_used_gb": 30.556873321533203,
- "gpu_memory_used": 1258.0,
- "relative_time": 198.29724264144897
- },
- {
- "timestamp": "2025-01-04T01:33:46.288883",
- "cpu_percent": 46.07,
- "ram_percent": 48.0,
- "ram_used_gb": 30.554439544677734,
- "gpu_memory_used": 1258.0,
- "relative_time": 199.39549779891968
- },
- {
- "timestamp": "2025-01-04T01:33:47.403171",
- "cpu_percent": 46.18,
- "ram_percent": 48.0,
- "ram_used_gb": 30.557025909423828,
- "gpu_memory_used": 1258.0,
- "relative_time": 200.50221276283264
- },
- {
- "timestamp": "2025-01-04T01:33:48.495515",
- "cpu_percent": 48.09,
- "ram_percent": 48.0,
- "ram_used_gb": 30.558856964111328,
- "gpu_memory_used": 1267.0,
- "relative_time": 201.62405467033386
- },
- {
- "timestamp": "2025-01-04T01:33:49.630725",
- "cpu_percent": 53.47,
- "ram_percent": 48.1,
- "ram_used_gb": 30.59896469116211,
- "gpu_memory_used": 1283.0,
- "relative_time": 202.70162987709045
- },
- {
- "timestamp": "2025-01-04T01:33:50.709226",
- "cpu_percent": 44.74,
- "ram_percent": 48.1,
- "ram_used_gb": 30.581470489501953,
- "gpu_memory_used": 1281.0,
- "relative_time": 203.78962469100952
- },
- {
- "timestamp": "2025-01-04T01:33:51.782302",
- "cpu_percent": 43.4,
- "ram_percent": 48.1,
- "ram_used_gb": 30.582977294921875,
- "gpu_memory_used": 1282.0,
- "relative_time": 204.87054562568665
- },
- {
- "timestamp": "2025-01-04T01:33:52.868020",
- "cpu_percent": 51.75,
- "ram_percent": 48.0,
- "ram_used_gb": 30.540206909179688,
- "gpu_memory_used": 1282.0,
- "relative_time": 205.95602416992188
- },
- {
- "timestamp": "2025-01-04T01:33:53.956023",
- "cpu_percent": 46.36,
- "ram_percent": 48.0,
- "ram_used_gb": 30.562763214111328,
- "gpu_memory_used": 1282.0,
- "relative_time": 207.06639337539673
- },
- {
- "timestamp": "2025-01-04T01:33:55.064043",
- "cpu_percent": 43.91,
- "ram_percent": 48.0,
- "ram_used_gb": 30.560302734375,
- "gpu_memory_used": 1277.0,
- "relative_time": 208.16699743270874
- },
- {
- "timestamp": "2025-01-04T01:33:56.170674",
- "cpu_percent": 50.01,
- "ram_percent": 48.1,
- "ram_used_gb": 30.576671600341797,
- "gpu_memory_used": 1281.0,
- "relative_time": 209.28660559654236
- },
- {
- "timestamp": "2025-01-04T01:33:57.288316",
- "cpu_percent": 50.51,
- "ram_percent": 48.0,
- "ram_used_gb": 30.551471710205078,
- "gpu_memory_used": 1280.0,
- "relative_time": 210.4030442237854
- },
- {
- "timestamp": "2025-01-04T01:33:58.407032",
- "cpu_percent": 49.43,
- "ram_percent": 48.1,
- "ram_used_gb": 30.576725006103516,
- "gpu_memory_used": 1280.0,
- "relative_time": 211.50494027137756
- },
- {
- "timestamp": "2025-01-04T01:33:59.497806",
- "cpu_percent": 46.68,
- "ram_percent": 48.1,
- "ram_used_gb": 30.59314727783203,
- "gpu_memory_used": 1279.0,
- "relative_time": 212.6002950668335
- },
- {
- "timestamp": "2025-01-04T01:34:00.598484",
- "cpu_percent": 57.44,
- "ram_percent": 48.1,
- "ram_used_gb": 30.60983657836914,
- "gpu_memory_used": 1285.0,
- "relative_time": 213.7150914669037
- },
- {
- "timestamp": "2025-01-04T01:34:01.719968",
- "cpu_percent": 54.58,
- "ram_percent": 48.1,
- "ram_used_gb": 30.586456298828125,
- "gpu_memory_used": 1283.0,
- "relative_time": 214.80932760238647
- },
- {
- "timestamp": "2025-01-04T01:34:02.807573",
- "cpu_percent": 61.69,
- "ram_percent": 48.0,
- "ram_used_gb": 30.53356170654297,
- "gpu_memory_used": 1281.0,
- "relative_time": 215.88946890830994
- },
- {
- "timestamp": "2025-01-04T01:34:03.885672",
- "cpu_percent": 49.46,
- "ram_percent": 48.0,
- "ram_used_gb": 30.517364501953125,
- "gpu_memory_used": 1283.0,
- "relative_time": 216.97114062309265
- },
- {
- "timestamp": "2025-01-04T01:34:04.974449",
- "cpu_percent": 42.69,
- "ram_percent": 48.0,
- "ram_used_gb": 30.527969360351562,
- "gpu_memory_used": 1285.0,
- "relative_time": 218.10192775726318
- },
- {
- "timestamp": "2025-01-04T01:34:06.107947",
- "cpu_percent": 54.87,
- "ram_percent": 48.0,
- "ram_used_gb": 30.51028823852539,
- "gpu_memory_used": 1273.0,
- "relative_time": 219.17600679397583
- },
- {
- "timestamp": "2025-01-04T01:34:07.172153",
- "cpu_percent": 45.42,
- "ram_percent": 48.0,
- "ram_used_gb": 30.509258270263672,
- "gpu_memory_used": 1273.0,
- "relative_time": 220.28902983665466
- },
- {
- "timestamp": "2025-01-04T01:34:08.289623",
- "cpu_percent": 52.75,
- "ram_percent": 48.0,
- "ram_used_gb": 30.52011489868164,
- "gpu_memory_used": 1272.0,
- "relative_time": 221.39960098266602
- },
- {
- "timestamp": "2025-01-04T01:34:09.406158",
- "cpu_percent": 52.53,
- "ram_percent": 48.0,
- "ram_used_gb": 30.52783966064453,
- "gpu_memory_used": 1265.0,
- "relative_time": 222.49749565124512
- },
- {
- "timestamp": "2025-01-04T01:34:10.491042",
- "cpu_percent": 56.49,
- "ram_percent": 48.0,
- "ram_used_gb": 30.540733337402344,
- "gpu_memory_used": 1261.0,
- "relative_time": 223.5777132511139
- },
- {
- "timestamp": "2025-01-04T01:34:11.577710",
- "cpu_percent": 44.25,
- "ram_percent": 48.0,
- "ram_used_gb": 30.531757354736328,
- "gpu_memory_used": 1262.0,
- "relative_time": 224.68288159370422
- },
- {
- "timestamp": "2025-01-04T01:34:12.682455",
- "cpu_percent": 47.56,
- "ram_percent": 48.0,
- "ram_used_gb": 30.50157928466797,
- "gpu_memory_used": 1262.0,
- "relative_time": 225.78287291526794
- },
- {
- "timestamp": "2025-01-04T01:34:13.782976",
- "cpu_percent": 48.52,
- "ram_percent": 48.0,
- "ram_used_gb": 30.507736206054688,
- "gpu_memory_used": 1262.0,
- "relative_time": 226.8910207748413
- },
- {
- "timestamp": "2025-01-04T01:34:14.884200",
- "cpu_percent": 49.89,
- "ram_percent": 48.0,
- "ram_used_gb": 30.50653076171875,
- "gpu_memory_used": 1263.0,
- "relative_time": 228.04418087005615
- },
- {
- "timestamp": "2025-01-04T01:34:16.051189",
- "cpu_percent": 49.34,
- "ram_percent": 48.0,
- "ram_used_gb": 30.504470825195312,
- "gpu_memory_used": 1263.0,
- "relative_time": 229.13680815696716
- },
- {
- "timestamp": "2025-01-04T01:34:17.136588",
- "cpu_percent": 47.8,
- "ram_percent": 47.9,
- "ram_used_gb": 30.496841430664062,
- "gpu_memory_used": 1263.0,
- "relative_time": 230.26778984069824
- },
- {
- "timestamp": "2025-01-04T01:34:18.269616",
- "cpu_percent": 48.23,
- "ram_percent": 48.0,
- "ram_used_gb": 30.50909423828125,
- "gpu_memory_used": 1262.0,
- "relative_time": 231.3880865573883
- },
- {
- "timestamp": "2025-01-04T01:34:19.387759",
- "cpu_percent": 42.46,
- "ram_percent": 48.0,
- "ram_used_gb": 30.525142669677734,
- "gpu_memory_used": 1262.0,
- "relative_time": 232.4770486354828
- },
- {
- "timestamp": "2025-01-04T01:34:20.471629",
- "cpu_percent": 44.17,
- "ram_percent": 48.0,
- "ram_used_gb": 30.535388946533203,
- "gpu_memory_used": 1259.0,
- "relative_time": 233.57954168319702
- },
- {
- "timestamp": "2025-01-04T01:34:21.576615",
- "cpu_percent": 45.36,
- "ram_percent": 48.0,
- "ram_used_gb": 30.529708862304688,
- "gpu_memory_used": 1259.0,
- "relative_time": 234.70528435707092
- },
- {
- "timestamp": "2025-01-04T01:34:22.709825",
- "cpu_percent": 52.14,
- "ram_percent": 47.9,
- "ram_used_gb": 30.490406036376953,
- "gpu_memory_used": 1259.0,
- "relative_time": 235.84367108345032
- },
- {
- "timestamp": "2025-01-04T01:34:23.834912",
- "cpu_percent": 49.39,
- "ram_percent": 47.9,
- "ram_used_gb": 30.49042510986328,
- "gpu_memory_used": 1259.0,
- "relative_time": 236.94777131080627
- },
- {
- "timestamp": "2025-01-04T01:34:24.940884",
- "cpu_percent": 51.84,
- "ram_percent": 47.9,
- "ram_used_gb": 30.489459991455078,
- "gpu_memory_used": 1259.0,
- "relative_time": 238.07107305526733
- },
- {
- "timestamp": "2025-01-04T01:34:26.077527",
- "cpu_percent": 49.55,
- "ram_percent": 47.9,
- "ram_used_gb": 30.488842010498047,
- "gpu_memory_used": 1259.0,
- "relative_time": 239.20314645767212
- },
- {
- "timestamp": "2025-01-04T01:34:27.199360",
- "cpu_percent": 47.71,
- "ram_percent": 47.9,
- "ram_used_gb": 30.49380874633789,
- "gpu_memory_used": 1258.0,
- "relative_time": 240.32860612869263
- },
- {
- "timestamp": "2025-01-04T01:34:28.333600",
- "cpu_percent": 48.61,
- "ram_percent": 48.0,
- "ram_used_gb": 30.503887176513672,
- "gpu_memory_used": 1258.0,
- "relative_time": 241.44983053207397
- },
- {
- "timestamp": "2025-01-04T01:34:29.453855",
- "cpu_percent": 51.01,
- "ram_percent": 48.0,
- "ram_used_gb": 30.512046813964844,
- "gpu_memory_used": 1258.0,
- "relative_time": 242.60703372955322
- },
- {
- "timestamp": "2025-01-04T01:34:30.613699",
- "cpu_percent": 53.89,
- "ram_percent": 48.0,
- "ram_used_gb": 30.522415161132812,
- "gpu_memory_used": 1258.0,
- "relative_time": 243.73219799995422
- },
- {
- "timestamp": "2025-01-04T01:34:31.735503",
- "cpu_percent": 21.25,
- "ram_percent": 48.2,
- "ram_used_gb": 30.68771743774414,
- "gpu_memory_used": 1260.0,
- "relative_time": 244.80069231987
- }
- ],
- "test_duration": 247.14976453781128
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt
deleted file mode 100644
index 541a304..0000000
--- a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-=== Benchmark Statistics (with correct RTF) ===
-
-Total tokens processed: 1800
-Total audio generated (s): 568.53
-Total test duration (s): 244.10
-Average processing rate (tokens/s): 7.34
-Average RTF: 0.43
-Average Real Time Speed: 2.33
-
-=== Per-chunk Stats ===
-
-Average chunk size (tokens): 600.00
-Min chunk size (tokens): 300
-Max chunk size (tokens): 900
-Average processing time (s): 81.30
-Average output length (s): 189.51
-
-=== Performance Ranges ===
-
-Processing rate range (tokens/s): 7.21 - 7.47
-RTF range: 0.43x - 0.43x
-Real Time Speed range: 2.33x - 2.33x
-
diff --git a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark.json b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark.json
deleted file mode 100644
index ae10c23..0000000
--- a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark.json
+++ /dev/null
@@ -1,403 +0,0 @@
-{
- "individual_runs": [
- {
- "text_length": 37,
- "token_count": 10,
- "total_time": 0.16574740409851074,
- "time_to_first_chunk": 0.16574740409851074,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run1.wav",
- "audio_length": 3.45,
- "target_tokens": 10,
- "actual_tokens": 10,
- "run_number": 1
- },
- {
- "text_length": 37,
- "token_count": 10,
- "total_time": 0.18812799453735352,
- "time_to_first_chunk": 0.18812799453735352,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run2.wav",
- "audio_length": 3.45,
- "target_tokens": 10,
- "actual_tokens": 10,
- "run_number": 2
- },
- {
- "text_length": 37,
- "token_count": 10,
- "total_time": 0.18645429611206055,
- "time_to_first_chunk": 0.18645429611206055,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run3.wav",
- "audio_length": 3.45,
- "target_tokens": 10,
- "actual_tokens": 10,
- "run_number": 3
- },
- {
- "text_length": 37,
- "token_count": 10,
- "total_time": 0.17632031440734863,
- "time_to_first_chunk": 0.17632031440734863,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run4.wav",
- "audio_length": 3.45,
- "target_tokens": 10,
- "actual_tokens": 10,
- "run_number": 4
- },
- {
- "text_length": 37,
- "token_count": 10,
- "total_time": 0.13381195068359375,
- "time_to_first_chunk": 0.13381195068359375,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens10_run5.wav",
- "audio_length": 3.45,
- "target_tokens": 10,
- "actual_tokens": 10,
- "run_number": 5
- },
- {
- "text_length": 102,
- "token_count": 25,
- "total_time": 0.2086498737335205,
- "time_to_first_chunk": 0.2086498737335205,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run1.wav",
- "audio_length": 7.225,
- "target_tokens": 25,
- "actual_tokens": 25,
- "run_number": 1
- },
- {
- "text_length": 102,
- "token_count": 25,
- "total_time": 0.2727653980255127,
- "time_to_first_chunk": 0.2727653980255127,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run2.wav",
- "audio_length": 7.225,
- "target_tokens": 25,
- "actual_tokens": 25,
- "run_number": 2
- },
- {
- "text_length": 102,
- "token_count": 25,
- "total_time": 0.2096250057220459,
- "time_to_first_chunk": 0.2096250057220459,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run3.wav",
- "audio_length": 7.225,
- "target_tokens": 25,
- "actual_tokens": 25,
- "run_number": 3
- },
- {
- "text_length": 102,
- "token_count": 25,
- "total_time": 0.2256758213043213,
- "time_to_first_chunk": 0.2256758213043213,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run4.wav",
- "audio_length": 7.225,
- "target_tokens": 25,
- "actual_tokens": 25,
- "run_number": 4
- },
- {
- "text_length": 102,
- "token_count": 25,
- "total_time": 0.1945042610168457,
- "time_to_first_chunk": 0.1945042610168457,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens25_run5.wav",
- "audio_length": 7.225,
- "target_tokens": 25,
- "actual_tokens": 25,
- "run_number": 5
- },
- {
- "text_length": 212,
- "token_count": 50,
- "total_time": 0.4975121021270752,
- "time_to_first_chunk": 0.4975121021270752,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run1.wav",
- "audio_length": 16.325,
- "target_tokens": 50,
- "actual_tokens": 50,
- "run_number": 1
- },
- {
- "text_length": 212,
- "token_count": 50,
- "total_time": 0.4518404006958008,
- "time_to_first_chunk": 0.4518404006958008,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run2.wav",
- "audio_length": 16.325,
- "target_tokens": 50,
- "actual_tokens": 50,
- "run_number": 2
- },
- {
- "text_length": 212,
- "token_count": 50,
- "total_time": 0.5640325546264648,
- "time_to_first_chunk": 0.5640325546264648,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run3.wav",
- "audio_length": 16.325,
- "target_tokens": 50,
- "actual_tokens": 50,
- "run_number": 3
- },
- {
- "text_length": 212,
- "token_count": 50,
- "total_time": 0.5305957794189453,
- "time_to_first_chunk": 0.5305957794189453,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run4.wav",
- "audio_length": 16.325,
- "target_tokens": 50,
- "actual_tokens": 50,
- "run_number": 4
- },
- {
- "text_length": 212,
- "token_count": 50,
- "total_time": 0.5540030002593994,
- "time_to_first_chunk": 0.5540030002593994,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens50_run5.wav",
- "audio_length": 16.325,
- "target_tokens": 50,
- "actual_tokens": 50,
- "run_number": 5
- },
- {
- "text_length": 448,
- "token_count": 100,
- "total_time": 0.7963137626647949,
- "time_to_first_chunk": 0.7963137626647949,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run1.wav",
- "audio_length": 31.1,
- "target_tokens": 100,
- "actual_tokens": 100,
- "run_number": 1
- },
- {
- "text_length": 448,
- "token_count": 100,
- "total_time": 0.9320805072784424,
- "time_to_first_chunk": 0.9320805072784424,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run2.wav",
- "audio_length": 31.1,
- "target_tokens": 100,
- "actual_tokens": 100,
- "run_number": 2
- },
- {
- "text_length": 448,
- "token_count": 100,
- "total_time": 0.824256181716919,
- "time_to_first_chunk": 0.824256181716919,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run3.wav",
- "audio_length": 31.1,
- "target_tokens": 100,
- "actual_tokens": 100,
- "run_number": 3
- },
- {
- "text_length": 448,
- "token_count": 100,
- "total_time": 0.9034836292266846,
- "time_to_first_chunk": 0.9034836292266846,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run4.wav",
- "audio_length": 31.1,
- "target_tokens": 100,
- "actual_tokens": 100,
- "run_number": 4
- },
- {
- "text_length": 448,
- "token_count": 100,
- "total_time": 0.8364357948303223,
- "time_to_first_chunk": 0.8364357948303223,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens100_run5.wav",
- "audio_length": 31.1,
- "target_tokens": 100,
- "actual_tokens": 100,
- "run_number": 5
- },
- {
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.8122682571411133,
- "time_to_first_chunk": 1.8122682571411133,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run1.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
- "run_number": 1
- },
- {
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.7290427684783936,
- "time_to_first_chunk": 1.7290427684783936,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run2.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
- "run_number": 2
- },
- {
- "text_length": 906,
- "token_count": 200,
- "total_time": 2.141728401184082,
- "time_to_first_chunk": 2.141728401184082,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run3.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
- "run_number": 3
- },
- {
- "text_length": 906,
- "token_count": 200,
- "total_time": 2.0155680179595947,
- "time_to_first_chunk": 2.0155680179595947,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run4.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
- "run_number": 4
- },
- {
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.8707575798034668,
- "time_to_first_chunk": 1.8707575798034668,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens200_run5.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
- "run_number": 5
- },
- {
- "text_length": 2232,
- "token_count": 500,
- "total_time": 4.822713851928711,
- "time_to_first_chunk": 4.822713851928711,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run1.wav",
- "audio_length": 157.875,
- "target_tokens": 500,
- "actual_tokens": 500,
- "run_number": 1
- },
- {
- "text_length": 2232,
- "token_count": 500,
- "total_time": 4.227782726287842,
- "time_to_first_chunk": 4.227782726287842,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run2.wav",
- "audio_length": 157.875,
- "target_tokens": 500,
- "actual_tokens": 500,
- "run_number": 2
- },
- {
- "text_length": 2232,
- "token_count": 500,
- "total_time": 4.414916276931763,
- "time_to_first_chunk": 4.414916276931763,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run3.wav",
- "audio_length": 157.875,
- "target_tokens": 500,
- "actual_tokens": 500,
- "run_number": 3
- },
- {
- "text_length": 2232,
- "token_count": 500,
- "total_time": 4.579505681991577,
- "time_to_first_chunk": 4.579505681991577,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run4.wav",
- "audio_length": 157.875,
- "target_tokens": 500,
- "actual_tokens": 500,
- "run_number": 4
- },
- {
- "text_length": 2232,
- "token_count": 500,
- "total_time": 4.332529067993164,
- "time_to_first_chunk": 4.332529067993164,
- "error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio\\benchmark_tokens500_run5.wav",
- "audio_length": 157.875,
- "target_tokens": 500,
- "actual_tokens": 500,
- "run_number": 5
- }
- ],
- "summary": {
- "10": {
- "avg_time_to_first_chunk": 0.17,
- "avg_total_time": 0.17,
- "avg_audio_length": 3.45,
- "num_successful_runs": 5
- },
- "25": {
- "avg_time_to_first_chunk": 0.222,
- "avg_total_time": 0.222,
- "avg_audio_length": 7.225,
- "num_successful_runs": 5
- },
- "50": {
- "avg_time_to_first_chunk": 0.52,
- "avg_total_time": 0.52,
- "avg_audio_length": 16.325,
- "num_successful_runs": 5
- },
- "100": {
- "avg_time_to_first_chunk": 0.859,
- "avg_total_time": 0.859,
- "avg_audio_length": 31.1,
- "num_successful_runs": 5
- },
- "200": {
- "avg_time_to_first_chunk": 1.914,
- "avg_total_time": 1.914,
- "avg_audio_length": 62.625,
- "num_successful_runs": 5
- },
- "500": {
- "avg_time_to_first_chunk": 4.475,
- "avg_total_time": 4.475,
- "avg_audio_length": 157.875,
- "num_successful_runs": 5
- }
- },
- "timestamp": "2025-01-04 13:52:28"
-}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json
index c8bb092..7501fb9 100644
--- a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json
+++ b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream.json
@@ -1,271 +1,337 @@
{
"individual_runs": [
{
- "text_length": 212,
- "token_count": 50,
- "total_time": 0.7278211116790771,
- "time_to_first_chunk": 0.3613290786743164,
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.4376556873321533,
+ "time_to_first_chunk": 0.4189143180847168,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run1_stream.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run1_stream.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 1
+ },
+ {
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.37163758277893066,
+ "time_to_first_chunk": 0.34892702102661133,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run2_stream.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 2
+ },
+ {
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.2654602527618408,
+ "time_to_first_chunk": 0.2409076690673828,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run3_stream.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 3
+ },
+ {
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.24376440048217773,
+ "time_to_first_chunk": 0.23003816604614258,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run4_stream.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 4
+ },
+ {
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.25968003273010254,
+ "time_to_first_chunk": 0.24081206321716309,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens10_run5_stream.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 5
+ },
+ {
+ "text_length": 212,
+ "token_count": null,
+ "total_time": 1.049060344696045,
+ "time_to_first_chunk": 0.3336215019226074,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run1_stream.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 1
},
{
"text_length": 212,
- "token_count": 50,
- "total_time": 0.4556088447570801,
- "time_to_first_chunk": 0.18642044067382812,
+ "token_count": null,
+ "total_time": 0.8934676647186279,
+ "time_to_first_chunk": 0.3011031150817871,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run2_stream.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run2_stream.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 2
},
{
"text_length": 212,
- "token_count": 50,
- "total_time": 0.5538768768310547,
- "time_to_first_chunk": 0.2720797061920166,
+ "token_count": null,
+ "total_time": 0.9444286823272705,
+ "time_to_first_chunk": 0.3198091983795166,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run3_stream.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run3_stream.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 3
},
{
"text_length": 212,
- "token_count": 50,
- "total_time": 0.4395604133605957,
- "time_to_first_chunk": 0.15613913536071777,
+ "token_count": null,
+ "total_time": 0.9735183715820312,
+ "time_to_first_chunk": 0.369948148727417,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run4_stream.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run4_stream.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 4
},
{
"text_length": 212,
- "token_count": 50,
- "total_time": 0.45748305320739746,
- "time_to_first_chunk": 0.18805718421936035,
+ "token_count": null,
+ "total_time": 0.8089118003845215,
+ "time_to_first_chunk": 0.30179858207702637,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run5_stream.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens50_run5_stream.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 5
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 0.7347762584686279,
- "time_to_first_chunk": 0.16963744163513184,
+ "token_count": null,
+ "total_time": 1.641003131866455,
+ "time_to_first_chunk": 0.2979745864868164,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run1_stream.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run1_stream.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 1
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 0.8288509845733643,
- "time_to_first_chunk": 0.20123004913330078,
+ "token_count": null,
+ "total_time": 1.3709619045257568,
+ "time_to_first_chunk": 0.4272146224975586,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run2_stream.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run2_stream.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 2
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 0.7503848075866699,
- "time_to_first_chunk": 0.21662068367004395,
+ "token_count": null,
+ "total_time": 1.2554471492767334,
+ "time_to_first_chunk": 0.29790568351745605,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run3_stream.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run3_stream.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 3
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 0.694899320602417,
- "time_to_first_chunk": 0.1966841220855713,
+ "token_count": null,
+ "total_time": 1.3761844635009766,
+ "time_to_first_chunk": 0.32633328437805176,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run4_stream.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run4_stream.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 4
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 0.68701171875,
- "time_to_first_chunk": 0.19341063499450684,
+ "token_count": null,
+ "total_time": 1.56705904006958,
+ "time_to_first_chunk": 0.32801246643066406,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run5_stream.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens100_run5_stream.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 5
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.6845426559448242,
- "time_to_first_chunk": 0.21096158027648926,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 5.086699962615967,
+ "time_to_first_chunk": 0.33925390243530273,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run1_stream.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run1_stream.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 1
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.3545098304748535,
- "time_to_first_chunk": 0.18648386001586914,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 3.827953338623047,
+ "time_to_first_chunk": 0.39266157150268555,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run2_stream.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run2_stream.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 2
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.426060676574707,
- "time_to_first_chunk": 0.20081472396850586,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 3.9389824867248535,
+ "time_to_first_chunk": 0.3231511116027832,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run3_stream.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run3_stream.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 3
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.4084081649780273,
- "time_to_first_chunk": 0.18551135063171387,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 3.942399740219116,
+ "time_to_first_chunk": 0.34731340408325195,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run4_stream.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run4_stream.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 4
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.4703152179718018,
- "time_to_first_chunk": 0.17750859260559082,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 3.7748308181762695,
+ "time_to_first_chunk": 0.40787601470947266,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens200_run5_stream.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens250_run5_stream.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 5
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 4.289574384689331,
- "time_to_first_chunk": 0.1997976303100586,
+ "token_count": null,
+ "total_time": 9.003147840499878,
+ "time_to_first_chunk": 0.5455703735351562,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run1_stream.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run1_stream.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 1
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 3.7089381217956543,
- "time_to_first_chunk": 0.25969815254211426,
+ "token_count": null,
+ "total_time": 10.081491231918335,
+ "time_to_first_chunk": 0.4591703414916992,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run2_stream.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run2_stream.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 2
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 4.138366222381592,
- "time_to_first_chunk": 0.1831505298614502,
+ "token_count": null,
+ "total_time": 9.767668962478638,
+ "time_to_first_chunk": 0.31237053871154785,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run3_stream.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run3_stream.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 3
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 3.980635643005371,
- "time_to_first_chunk": 0.20493030548095703,
+ "token_count": null,
+ "total_time": 9.090342998504639,
+ "time_to_first_chunk": 0.41753244400024414,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run4_stream.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run4_stream.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 4
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 4.1370298862457275,
- "time_to_first_chunk": 0.19150757789611816,
+ "token_count": null,
+ "total_time": 9.876578330993652,
+ "time_to_first_chunk": 0.3965120315551758,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run5_stream.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream\\benchmark_tokens500_run5_stream.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 5
}
],
"summary": {
+ "10": {
+ "avg_time_to_first_chunk": 0.296,
+ "avg_total_time": 0.316,
+ "avg_audio_length": 3.45,
+ "num_successful_runs": 5
+ },
"50": {
- "avg_time_to_first_chunk": 0.233,
- "avg_total_time": 0.527,
- "avg_audio_length": 16.325,
+ "avg_time_to_first_chunk": 0.325,
+ "avg_total_time": 0.934,
+ "avg_audio_length": 15.925,
"num_successful_runs": 5
},
"100": {
- "avg_time_to_first_chunk": 0.196,
- "avg_total_time": 0.739,
- "avg_audio_length": 31.1,
+ "avg_time_to_first_chunk": 0.335,
+ "avg_total_time": 1.442,
+ "avg_audio_length": 30.5,
"num_successful_runs": 5
},
- "200": {
- "avg_time_to_first_chunk": 0.192,
- "avg_total_time": 1.469,
- "avg_audio_length": 62.625,
+ "250": {
+ "avg_time_to_first_chunk": 0.362,
+ "avg_total_time": 4.114,
+ "avg_audio_length": 78.775,
"num_successful_runs": 5
},
"500": {
- "avg_time_to_first_chunk": 0.208,
- "avg_total_time": 4.051,
- "avg_audio_length": 157.875,
+ "avg_time_to_first_chunk": 0.426,
+ "avg_total_time": 9.564,
+ "avg_audio_length": 156.475,
"num_successful_runs": 5
}
},
- "timestamp": "2025-01-04 22:16:30"
+ "timestamp": "2025-01-06 00:00:43"
}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json
index b996231..179998d 100644
--- a/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json
+++ b/examples/assorted_checks/benchmarks/output_data/first_token_benchmark_stream_openai.json
@@ -1,271 +1,337 @@
{
"individual_runs": [
{
- "text_length": 212,
- "token_count": 50,
- "total_time": 1.149611473083496,
- "time_to_first_chunk": 0.8767304420471191,
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.7105245590209961,
+ "time_to_first_chunk": 0.6905441284179688,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run1_stream_openai.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run1_stream_openai.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 1
+ },
+ {
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.35063982009887695,
+ "time_to_first_chunk": 0.32647228240966797,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run2_stream_openai.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 2
+ },
+ {
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.43519043922424316,
+ "time_to_first_chunk": 0.41011548042297363,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run3_stream_openai.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 3
+ },
+ {
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.33886170387268066,
+ "time_to_first_chunk": 0.32068943977355957,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run4_stream_openai.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 4
+ },
+ {
+ "text_length": 37,
+ "token_count": null,
+ "total_time": 0.31725525856018066,
+ "time_to_first_chunk": 0.29624342918395996,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens10_run5_stream_openai.wav",
+ "audio_length": 3.45,
+ "target_tokens": 10,
+ "actual_tokens": 10,
+ "run_number": 5
+ },
+ {
+ "text_length": 212,
+ "token_count": null,
+ "total_time": 1.0215234756469727,
+ "time_to_first_chunk": 0.38323354721069336,
+ "error": null,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run1_stream_openai.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 1
},
{
"text_length": 212,
- "token_count": 50,
- "total_time": 0.9325947761535645,
- "time_to_first_chunk": 0.5965914726257324,
+ "token_count": null,
+ "total_time": 1.38511061668396,
+ "time_to_first_chunk": 0.47052764892578125,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run2_stream_openai.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run2_stream_openai.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 2
},
{
"text_length": 212,
- "token_count": 50,
- "total_time": 0.9205234050750732,
- "time_to_first_chunk": 0.5961906909942627,
+ "token_count": null,
+ "total_time": 1.0185234546661377,
+ "time_to_first_chunk": 0.3535764217376709,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run3_stream_openai.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run3_stream_openai.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 3
},
{
"text_length": 212,
- "token_count": 50,
- "total_time": 1.1321916580200195,
- "time_to_first_chunk": 0.6946916580200195,
+ "token_count": null,
+ "total_time": 0.8875925540924072,
+ "time_to_first_chunk": 0.3373105525970459,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run4_stream_openai.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run4_stream_openai.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 4
},
{
"text_length": 212,
- "token_count": 50,
- "total_time": 1.1146185398101807,
- "time_to_first_chunk": 0.6918885707855225,
+ "token_count": null,
+ "total_time": 0.9557526111602783,
+ "time_to_first_chunk": 0.3364882469177246,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run5_stream_openai.wav",
- "audio_length": 16.325,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens50_run5_stream_openai.wav",
+ "audio_length": 15.925,
"target_tokens": 50,
"actual_tokens": 50,
"run_number": 5
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 1.3645410537719727,
- "time_to_first_chunk": 0.6802399158477783,
+ "token_count": null,
+ "total_time": 1.569596767425537,
+ "time_to_first_chunk": 0.42070746421813965,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run1_stream_openai.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run1_stream_openai.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 1
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 1.4154777526855469,
- "time_to_first_chunk": 0.7297353744506836,
+ "token_count": null,
+ "total_time": 1.5172030925750732,
+ "time_to_first_chunk": 0.3982264995574951,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run2_stream_openai.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run2_stream_openai.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 2
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 1.3589520454406738,
- "time_to_first_chunk": 0.698603630065918,
+ "token_count": null,
+ "total_time": 1.5318474769592285,
+ "time_to_first_chunk": 0.3533785343170166,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run3_stream_openai.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run3_stream_openai.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 3
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 1.2276430130004883,
- "time_to_first_chunk": 0.6705801486968994,
+ "token_count": null,
+ "total_time": 1.3858752250671387,
+ "time_to_first_chunk": 0.3360786437988281,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run4_stream_openai.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run4_stream_openai.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 4
},
{
"text_length": 448,
- "token_count": 100,
- "total_time": 1.0949454307556152,
- "time_to_first_chunk": 0.5698442459106445,
+ "token_count": null,
+ "total_time": 1.7841475009918213,
+ "time_to_first_chunk": 0.34446048736572266,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run5_stream_openai.wav",
- "audio_length": 31.1,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens100_run5_stream_openai.wav",
+ "audio_length": 30.5,
"target_tokens": 100,
"actual_tokens": 100,
"run_number": 5
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.8211240768432617,
- "time_to_first_chunk": 0.6070489883422852,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 4.334965467453003,
+ "time_to_first_chunk": 0.4336512088775635,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run1_stream_openai.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run1_stream_openai.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 1
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.8376774787902832,
- "time_to_first_chunk": 0.6538689136505127,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 5.265941858291626,
+ "time_to_first_chunk": 0.5461773872375488,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run2_stream_openai.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run2_stream_openai.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 2
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.6953792572021484,
- "time_to_first_chunk": 0.5554308891296387,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 5.66066575050354,
+ "time_to_first_chunk": 0.4757547378540039,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run3_stream_openai.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run3_stream_openai.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 3
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.887030839920044,
- "time_to_first_chunk": 0.5866930484771729,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 9.289174318313599,
+ "time_to_first_chunk": 0.40159058570861816,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run4_stream_openai.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run4_stream_openai.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 4
},
{
- "text_length": 906,
- "token_count": 200,
- "total_time": 1.7908406257629395,
- "time_to_first_chunk": 0.5897490978240967,
+ "text_length": 1140,
+ "token_count": null,
+ "total_time": 4.425869703292847,
+ "time_to_first_chunk": 0.40808558464050293,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens200_run5_stream_openai.wav",
- "audio_length": 62.625,
- "target_tokens": 200,
- "actual_tokens": 200,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens250_run5_stream_openai.wav",
+ "audio_length": 78.775,
+ "target_tokens": 250,
+ "actual_tokens": 250,
"run_number": 5
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 4.228837013244629,
- "time_to_first_chunk": 0.5315976142883301,
+ "token_count": null,
+ "total_time": 9.600461483001709,
+ "time_to_first_chunk": 0.3966805934906006,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run1_stream_openai.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run1_stream_openai.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 1
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 4.489210367202759,
- "time_to_first_chunk": 0.5261838436126709,
+ "token_count": null,
+ "total_time": 8.82239580154419,
+ "time_to_first_chunk": 0.3900904655456543,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run2_stream_openai.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run2_stream_openai.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 2
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 4.5290446281433105,
- "time_to_first_chunk": 0.6186764240264893,
+ "token_count": null,
+ "total_time": 10.99152159690857,
+ "time_to_first_chunk": 0.4041757583618164,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run3_stream_openai.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run3_stream_openai.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 3
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 4.209261178970337,
- "time_to_first_chunk": 0.5990591049194336,
+ "token_count": null,
+ "total_time": 9.12995958328247,
+ "time_to_first_chunk": 0.43430614471435547,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run4_stream_openai.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run4_stream_openai.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 4
},
{
"text_length": 2232,
- "token_count": 500,
- "total_time": 4.218762636184692,
- "time_to_first_chunk": 0.5466251373291016,
+ "token_count": null,
+ "total_time": 10.043727159500122,
+ "time_to_first_chunk": 0.41181445121765137,
"error": null,
- "audio_path": "c:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run5_stream_openai.wav",
- "audio_length": 157.875,
+ "audio_path": "C:\\Users\\jerem\\Desktop\\Kokoro-FastAPI\\examples\\assorted_checks\\benchmarks\\output_audio_stream_openai\\benchmark_tokens500_run5_stream_openai.wav",
+ "audio_length": 156.475,
"target_tokens": 500,
"actual_tokens": 500,
"run_number": 5
}
],
"summary": {
+ "10": {
+ "avg_time_to_first_chunk": 0.409,
+ "avg_total_time": 0.43,
+ "avg_audio_length": 3.45,
+ "num_successful_runs": 5
+ },
"50": {
- "avg_time_to_first_chunk": 0.691,
- "avg_total_time": 1.05,
- "avg_audio_length": 16.325,
+ "avg_time_to_first_chunk": 0.376,
+ "avg_total_time": 1.054,
+ "avg_audio_length": 15.925,
"num_successful_runs": 5
},
"100": {
- "avg_time_to_first_chunk": 0.67,
- "avg_total_time": 1.292,
- "avg_audio_length": 31.1,
+ "avg_time_to_first_chunk": 0.371,
+ "avg_total_time": 1.558,
+ "avg_audio_length": 30.5,
"num_successful_runs": 5
},
- "200": {
- "avg_time_to_first_chunk": 0.599,
- "avg_total_time": 1.806,
- "avg_audio_length": 62.625,
+ "250": {
+ "avg_time_to_first_chunk": 0.453,
+ "avg_total_time": 5.795,
+ "avg_audio_length": 78.775,
"num_successful_runs": 5
},
"500": {
- "avg_time_to_first_chunk": 0.564,
- "avg_total_time": 4.335,
- "avg_audio_length": 157.875,
+ "avg_time_to_first_chunk": 0.407,
+ "avg_total_time": 9.718,
+ "avg_audio_length": 156.475,
"num_successful_runs": 5
}
},
- "timestamp": "2025-01-04 22:18:03"
+ "timestamp": "2025-01-06 00:02:21"
}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json
index ccac37e..5a44ee5 100644
--- a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json
+++ b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json
@@ -2,1252 +2,592 @@
"results": [
{
"tokens": 150,
- "processing_time": 1.86,
- "output_length": 45.9,
- "rtf": 0.04,
- "elapsed_time": 1.92
+ "processing_time": 2.8,
+ "output_length": 49.7,
+ "rtf": 0.06,
+ "elapsed_time": 2.88591
},
{
"tokens": 300,
- "processing_time": 3.08,
- "output_length": 96.425,
- "rtf": 0.03,
- "elapsed_time": 5.06
+ "processing_time": 6.33,
+ "output_length": 100.225,
+ "rtf": 0.06,
+ "elapsed_time": 9.3164
},
{
"tokens": 450,
- "processing_time": 4.4,
- "output_length": 143.1,
- "rtf": 0.03,
- "elapsed_time": 9.53
+ "processing_time": 11.23,
+ "output_length": 146.9,
+ "rtf": 0.08,
+ "elapsed_time": 20.69256
},
{
"tokens": 600,
- "processing_time": 6.47,
- "output_length": 188.675,
- "rtf": 0.03,
- "elapsed_time": 16.06
+ "processing_time": 14.11,
+ "output_length": 198.7,
+ "rtf": 0.07,
+ "elapsed_time": 34.94436
},
{
"tokens": 750,
- "processing_time": 8.32,
- "output_length": 236.7,
- "rtf": 0.04,
- "elapsed_time": 24.45
+ "processing_time": 14.97,
+ "output_length": 255.05,
+ "rtf": 0.06,
+ "elapsed_time": 50.10276
},
{
"tokens": 900,
- "processing_time": 8.92,
- "output_length": 283.425,
- "rtf": 0.03,
- "elapsed_time": 33.45
- },
- {
- "tokens": 2000,
- "processing_time": 18.55,
- "output_length": 624.325,
- "rtf": 0.03,
- "elapsed_time": 52.14
- },
- {
- "tokens": 3000,
- "processing_time": 23.98,
- "output_length": 931.15,
- "rtf": 0.03,
- "elapsed_time": 76.32
- },
- {
- "tokens": 4000,
- "processing_time": 32.93,
- "output_length": 1222.1,
- "rtf": 0.03,
- "elapsed_time": 109.53
- },
- {
- "tokens": 5000,
- "processing_time": 45.39,
- "output_length": 1524.575,
- "rtf": 0.03,
- "elapsed_time": 155.23
+ "processing_time": 19.96,
+ "output_length": 305.45,
+ "rtf": 0.07,
+ "elapsed_time": 70.19825
}
],
"system_metrics": [
{
- "timestamp": "2025-01-04T02:37:52.172368",
- "cpu_percent": 11.51,
- "ram_percent": 52.8,
- "ram_used_gb": 33.61172866821289,
- "gpu_memory_used": 3216.0,
- "relative_time": 0.08031892776489258
+ "timestamp": "2025-01-05T23:58:01.325720",
+ "cpu_percent": 80.02,
+ "ram_percent": 72.3,
+ "ram_used_gb": 45.9870491027832,
+ "gpu_memory_used": 6898.0,
+ "relative_time": 0.11059904098510742
},
- {
- "timestamp": "2025-01-04T02:37:53.266071",
- "cpu_percent": 15.33,
- "ram_percent": 52.9,
- "ram_used_gb": 33.678314208984375,
- "gpu_memory_used": 3392.0,
- "relative_time": 1.1673684120178223
+ {
+ "timestamp": "2025-01-05T23:58:02.435394",
+ "cpu_percent": 35.49,
+ "ram_percent": 72.5,
+ "ram_used_gb": 46.1334114074707,
+ "gpu_memory_used": 6900.0,
+ "relative_time": 1.1894314289093018
},
- {
- "timestamp": "2025-01-04T02:37:54.352909",
- "cpu_percent": 15.3,
- "ram_percent": 53.0,
- "ram_used_gb": 33.712764739990234,
- "gpu_memory_used": 3667.0,
- "relative_time": 2.253591537475586
+ {
+ "timestamp": "2025-01-05T23:58:03.515145",
+ "cpu_percent": 24.51,
+ "ram_percent": 72.8,
+ "ram_used_gb": 46.30204772949219,
+ "gpu_memory_used": 6898.0,
+ "relative_time": 2.2583394050598145
},
- {
- "timestamp": "2025-01-04T02:37:55.439413",
- "cpu_percent": 23.51,
- "ram_percent": 52.7,
- "ram_used_gb": 33.49789810180664,
- "gpu_memory_used": 3662.0,
- "relative_time": 3.3292760848999023
+ {
+ "timestamp": "2025-01-05T23:58:04.583438",
+ "cpu_percent": 21.46,
+ "ram_percent": 72.9,
+ "ram_used_gb": 46.4000358581543,
+ "gpu_memory_used": 6898.0,
+ "relative_time": 3.3375651836395264
},
- {
- "timestamp": "2025-01-04T02:37:56.511211",
- "cpu_percent": 14.69,
- "ram_percent": 52.7,
- "ram_used_gb": 33.494102478027344,
- "gpu_memory_used": 3668.0,
- "relative_time": 4.397106885910034
+ {
+ "timestamp": "2025-01-05T23:58:05.662045",
+ "cpu_percent": 21.96,
+ "ram_percent": 73.0,
+ "ram_used_gb": 46.42799758911133,
+ "gpu_memory_used": 6898.0,
+ "relative_time": 4.432476043701172
},
- {
- "timestamp": "2025-01-04T02:37:57.582176",
- "cpu_percent": 11.01,
- "ram_percent": 52.8,
- "ram_used_gb": 33.564491271972656,
- "gpu_memory_used": 3665.0,
- "relative_time": 5.46670126914978
+ {
+ "timestamp": "2025-01-05T23:58:06.757941",
+ "cpu_percent": 22.5,
+ "ram_percent": 73.0,
+ "ram_used_gb": 46.43841552734375,
+ "gpu_memory_used": 6897.0,
+ "relative_time": 5.494191408157349
},
- {
- "timestamp": "2025-01-04T02:37:58.637969",
- "cpu_percent": 15.04,
- "ram_percent": 52.8,
- "ram_used_gb": 33.555362701416016,
- "gpu_memory_used": 3668.0,
- "relative_time": 6.523184061050415
+ {
+ "timestamp": "2025-01-05T23:58:07.820104",
+ "cpu_percent": 29.33,
+ "ram_percent": 73.2,
+ "ram_used_gb": 46.577056884765625,
+ "gpu_memory_used": 6898.0,
+ "relative_time": 6.57433009147644
},
- {
- "timestamp": "2025-01-04T02:37:59.700880",
- "cpu_percent": 13.32,
- "ram_percent": 52.8,
- "ram_used_gb": 33.559967041015625,
- "gpu_memory_used": 3668.0,
- "relative_time": 7.589032888412476
+ {
+ "timestamp": "2025-01-05T23:58:08.899575",
+ "cpu_percent": 44.12,
+ "ram_percent": 73.2,
+ "ram_used_gb": 46.559593200683594,
+ "gpu_memory_used": 6898.0,
+ "relative_time": 7.6514153480529785
},
- {
- "timestamp": "2025-01-04T02:38:00.773895",
- "cpu_percent": 12.45,
- "ram_percent": 52.8,
- "ram_used_gb": 33.609134674072266,
- "gpu_memory_used": 3667.0,
- "relative_time": 8.677486181259155
+ {
+ "timestamp": "2025-01-05T23:58:09.977111",
+ "cpu_percent": 52.3,
+ "ram_percent": 73.1,
+ "ram_used_gb": 46.498756408691406,
+ "gpu_memory_used": 6898.0,
+ "relative_time": 8.74302339553833
},
- {
- "timestamp": "2025-01-04T02:38:01.851195",
- "cpu_percent": 12.62,
- "ram_percent": 52.9,
- "ram_used_gb": 33.67635726928711,
- "gpu_memory_used": 3665.0,
- "relative_time": 9.734971046447754
+ {
+ "timestamp": "2025-01-05T23:58:11.067817",
+ "cpu_percent": 32.66,
+ "ram_percent": 73.2,
+ "ram_used_gb": 46.585052490234375,
+ "gpu_memory_used": 6889.0,
+ "relative_time": 9.832671403884888
},
- {
- "timestamp": "2025-01-04T02:38:02.907897",
- "cpu_percent": 20.61,
- "ram_percent": 53.0,
- "ram_used_gb": 33.72555160522461,
- "gpu_memory_used": 3660.0,
- "relative_time": 10.813292026519775
+ {
+ "timestamp": "2025-01-05T23:58:12.158255",
+ "cpu_percent": 28.38,
+ "ram_percent": 73.7,
+ "ram_used_gb": 46.87907791137695,
+ "gpu_memory_used": 6889.0,
+ "relative_time": 10.914559125900269
},
- {
- "timestamp": "2025-01-04T02:38:03.996322",
- "cpu_percent": 33.24,
- "ram_percent": 53.2,
- "ram_used_gb": 33.832088470458984,
- "gpu_memory_used": 3660.0,
- "relative_time": 11.917856454849243
+ {
+ "timestamp": "2025-01-05T23:58:13.239163",
+ "cpu_percent": 31.73,
+ "ram_percent": 73.8,
+ "ram_used_gb": 46.930152893066406,
+ "gpu_memory_used": 6893.0,
+ "relative_time": 11.998127222061157
},
- {
- "timestamp": "2025-01-04T02:38:05.101973",
- "cpu_percent": 14.24,
- "ram_percent": 53.0,
- "ram_used_gb": 33.7408447265625,
- "gpu_memory_used": 3662.0,
- "relative_time": 12.986546277999878
+ {
+ "timestamp": "2025-01-05T23:58:14.323142",
+ "cpu_percent": 31.32,
+ "ram_percent": 74.4,
+ "ram_used_gb": 47.331382751464844,
+ "gpu_memory_used": 6897.0,
+ "relative_time": 13.080781936645508
},
{
- "timestamp": "2025-01-04T02:38:06.162037",
- "cpu_percent": 14.38,
- "ram_percent": 53.1,
- "ram_used_gb": 33.774169921875,
- "gpu_memory_used": 3662.0,
- "relative_time": 14.062608242034912
+ "timestamp": "2025-01-05T23:58:15.407719",
+ "cpu_percent": 26.34,
+ "ram_percent": 74.4,
+ "ram_used_gb": 47.34866714477539,
+ "gpu_memory_used": 6897.0,
+ "relative_time": 14.168652534484863
},
{
- "timestamp": "2025-01-04T02:38:07.248210",
- "cpu_percent": 14.39,
- "ram_percent": 53.2,
- "ram_used_gb": 33.83738327026367,
- "gpu_memory_used": 4029.0,
- "relative_time": 15.156044960021973
+ "timestamp": "2025-01-05T23:58:16.493716",
+ "cpu_percent": 39.87,
+ "ram_percent": 74.4,
+ "ram_used_gb": 47.35265350341797,
+ "gpu_memory_used": 6897.0,
+ "relative_time": 15.266503810882568
},
{
- "timestamp": "2025-01-04T02:38:08.329582",
- "cpu_percent": 31.18,
- "ram_percent": 53.2,
- "ram_used_gb": 33.87126541137695,
- "gpu_memory_used": 4032.0,
- "relative_time": 16.249940395355225
+ "timestamp": "2025-01-05T23:58:17.592593",
+ "cpu_percent": 39.32,
+ "ram_percent": 74.5,
+ "ram_used_gb": 47.37355041503906,
+ "gpu_memory_used": 6898.0,
+ "relative_time": 16.375996112823486
},
{
- "timestamp": "2025-01-04T02:38:09.432992",
- "cpu_percent": 19.33,
- "ram_percent": 53.2,
- "ram_used_gb": 33.842403411865234,
- "gpu_memory_used": 4032.0,
- "relative_time": 17.331223011016846
+ "timestamp": "2025-01-05T23:58:18.701607",
+ "cpu_percent": 33.48,
+ "ram_percent": 74.4,
+ "ram_used_gb": 47.338523864746094,
+ "gpu_memory_used": 6894.0,
+ "relative_time": 17.455860376358032
},
{
- "timestamp": "2025-01-04T02:38:10.505101",
- "cpu_percent": 13.34,
- "ram_percent": 53.2,
- "ram_used_gb": 33.86738967895508,
- "gpu_memory_used": 4029.0,
- "relative_time": 18.390397548675537
+ "timestamp": "2025-01-05T23:58:19.781209",
+ "cpu_percent": 24.39,
+ "ram_percent": 75.7,
+ "ram_used_gb": 48.1730842590332,
+ "gpu_memory_used": 6894.0,
+ "relative_time": 18.53490376472473
},
{
- "timestamp": "2025-01-04T02:38:11.570033",
- "cpu_percent": 12.61,
- "ram_percent": 53.4,
- "ram_used_gb": 33.938289642333984,
- "gpu_memory_used": 4028.0,
- "relative_time": 19.477521181106567
+ "timestamp": "2025-01-05T23:58:20.860173",
+ "cpu_percent": 27.27,
+ "ram_percent": 75.6,
+ "ram_used_gb": 48.08787536621094,
+ "gpu_memory_used": 6894.0,
+ "relative_time": 19.615966081619263
},
{
- "timestamp": "2025-01-04T02:38:12.663780",
- "cpu_percent": 15.78,
- "ram_percent": 53.4,
- "ram_used_gb": 33.969398498535156,
- "gpu_memory_used": 4030.0,
- "relative_time": 20.57425808906555
+ "timestamp": "2025-01-05T23:58:21.942004",
+ "cpu_percent": 26.82,
+ "ram_percent": 75.8,
+ "ram_used_gb": 48.20832443237305,
+ "gpu_memory_used": 6892.0,
+ "relative_time": 20.724673748016357
},
{
- "timestamp": "2025-01-04T02:38:13.750065",
- "cpu_percent": 18.69,
- "ram_percent": 53.5,
- "ram_used_gb": 34.03954315185547,
- "gpu_memory_used": 4021.0,
- "relative_time": 21.652076244354248
+ "timestamp": "2025-01-05T23:58:23.050088",
+ "cpu_percent": 46.01,
+ "ram_percent": 75.8,
+ "ram_used_gb": 48.23963928222656,
+ "gpu_memory_used": 6887.0,
+ "relative_time": 21.814561367034912
},
{
- "timestamp": "2025-01-04T02:38:14.825318",
- "cpu_percent": 10.48,
- "ram_percent": 53.6,
- "ram_used_gb": 34.07048416137695,
- "gpu_memory_used": 4025.0,
- "relative_time": 22.73010230064392
+ "timestamp": "2025-01-05T23:58:24.139949",
+ "cpu_percent": 27.91,
+ "ram_percent": 75.9,
+ "ram_used_gb": 48.260440826416016,
+ "gpu_memory_used": 6894.0,
+ "relative_time": 22.900237798690796
},
{
- "timestamp": "2025-01-04T02:38:15.912340",
- "cpu_percent": 12.53,
- "ram_percent": 53.6,
- "ram_used_gb": 34.09389877319336,
- "gpu_memory_used": 4026.0,
- "relative_time": 23.81609869003296
+ "timestamp": "2025-01-05T23:58:25.226167",
+ "cpu_percent": 20.4,
+ "ram_percent": 76.4,
+ "ram_used_gb": 48.5912971496582,
+ "gpu_memory_used": 6894.0,
+ "relative_time": 24.029305934906006
},
{
- "timestamp": "2025-01-04T02:38:17.003329",
- "cpu_percent": 16.09,
- "ram_percent": 53.7,
- "ram_used_gb": 34.1781120300293,
- "gpu_memory_used": 4025.0,
- "relative_time": 24.90904140472412
+ "timestamp": "2025-01-05T23:58:26.354518",
+ "cpu_percent": 26.17,
+ "ram_percent": 76.5,
+ "ram_used_gb": 48.63065719604492,
+ "gpu_memory_used": 6896.0,
+ "relative_time": 25.113131046295166
},
{
- "timestamp": "2025-01-04T02:38:18.079837",
- "cpu_percent": 14.98,
- "ram_percent": 53.8,
- "ram_used_gb": 34.21260070800781,
- "gpu_memory_used": 4025.0,
- "relative_time": 25.986279249191284
+ "timestamp": "2025-01-05T23:58:27.438557",
+ "cpu_percent": 19.84,
+ "ram_percent": 76.5,
+ "ram_used_gb": 48.630950927734375,
+ "gpu_memory_used": 6896.0,
+ "relative_time": 26.198577404022217
},
{
- "timestamp": "2025-01-04T02:38:19.167635",
- "cpu_percent": 14.85,
- "ram_percent": 53.8,
- "ram_used_gb": 34.23923873901367,
- "gpu_memory_used": 4024.0,
- "relative_time": 27.076823234558105
+ "timestamp": "2025-01-05T23:58:28.523869",
+ "cpu_percent": 25.87,
+ "ram_percent": 76.4,
+ "ram_used_gb": 48.61442565917969,
+ "gpu_memory_used": 6896.0,
+ "relative_time": 27.290891647338867
},
{
- "timestamp": "2025-01-04T02:38:20.258141",
- "cpu_percent": 15.05,
- "ram_percent": 53.9,
- "ram_used_gb": 34.26483917236328,
- "gpu_memory_used": 4015.0,
- "relative_time": 28.144607067108154
+ "timestamp": "2025-01-05T23:58:29.616289",
+ "cpu_percent": 26.29,
+ "ram_percent": 78.0,
+ "ram_used_gb": 49.63731384277344,
+ "gpu_memory_used": 6896.0,
+ "relative_time": 28.37503457069397
},
{
- "timestamp": "2025-01-04T02:38:21.315694",
- "cpu_percent": 17.08,
- "ram_percent": 53.9,
- "ram_used_gb": 34.31473922729492,
- "gpu_memory_used": 4016.0,
- "relative_time": 29.20189356803894
+ "timestamp": "2025-01-05T23:58:30.702453",
+ "cpu_percent": 31.57,
+ "ram_percent": 79.0,
+ "ram_used_gb": 50.24030303955078,
+ "gpu_memory_used": 6896.0,
+ "relative_time": 29.482722520828247
},
{
- "timestamp": "2025-01-04T02:38:22.388259",
- "cpu_percent": 17.47,
- "ram_percent": 54.0,
- "ram_used_gb": 34.35490798950195,
- "gpu_memory_used": 4016.0,
- "relative_time": 30.28918957710266
+ "timestamp": "2025-01-05T23:58:31.807837",
+ "cpu_percent": 43.3,
+ "ram_percent": 78.9,
+ "ram_used_gb": 50.18223190307617,
+ "gpu_memory_used": 6897.0,
+ "relative_time": 30.574514150619507
},
{
- "timestamp": "2025-01-04T02:38:23.463469",
- "cpu_percent": 15.76,
- "ram_percent": 54.0,
- "ram_used_gb": 34.33717346191406,
- "gpu_memory_used": 4002.0,
- "relative_time": 31.364880561828613
+ "timestamp": "2025-01-05T23:58:32.900169",
+ "cpu_percent": 31.68,
+ "ram_percent": 78.7,
+ "ram_used_gb": 50.09246063232422,
+ "gpu_memory_used": 6898.0,
+ "relative_time": 31.645864486694336
},
{
- "timestamp": "2025-01-04T02:38:24.540334",
- "cpu_percent": 13.54,
- "ram_percent": 54.1,
- "ram_used_gb": 34.38197708129883,
- "gpu_memory_used": 3999.0,
- "relative_time": 32.4253191947937
+ "timestamp": "2025-01-05T23:58:33.970378",
+ "cpu_percent": 33.76,
+ "ram_percent": 78.7,
+ "ram_used_gb": 50.055450439453125,
+ "gpu_memory_used": 6899.0,
+ "relative_time": 32.75911498069763
},
{
- "timestamp": "2025-01-04T02:38:25.597934",
- "cpu_percent": 13.99,
- "ram_percent": 54.2,
- "ram_used_gb": 34.48365783691406,
- "gpu_memory_used": 4004.0,
- "relative_time": 33.50029754638672
+ "timestamp": "2025-01-05T23:58:35.085855",
+ "cpu_percent": 30.84,
+ "ram_percent": 78.6,
+ "ram_used_gb": 49.99536895751953,
+ "gpu_memory_used": 6900.0,
+ "relative_time": 33.86884117126465
},
{
- "timestamp": "2025-01-04T02:38:26.673108",
- "cpu_percent": 15.16,
- "ram_percent": 54.2,
- "ram_used_gb": 34.50083923339844,
- "gpu_memory_used": 4011.0,
- "relative_time": 34.5756139755249
+ "timestamp": "2025-01-05T23:58:36.197184",
+ "cpu_percent": 72.82,
+ "ram_percent": 78.7,
+ "ram_used_gb": 50.043052673339844,
+ "gpu_memory_used": 6905.0,
+ "relative_time": 35.01772928237915
},
{
- "timestamp": "2025-01-04T02:38:27.748147",
- "cpu_percent": 17.68,
- "ram_percent": 54.2,
- "ram_used_gb": 34.49884033203125,
- "gpu_memory_used": 4016.0,
- "relative_time": 35.650988817214966
+ "timestamp": "2025-01-05T23:58:37.343692",
+ "cpu_percent": 88.62,
+ "ram_percent": 78.6,
+ "ram_used_gb": 50.025630950927734,
+ "gpu_memory_used": 6905.0,
+ "relative_time": 36.140772104263306
},
{
- "timestamp": "2025-01-04T02:38:28.835603",
- "cpu_percent": 26.81,
- "ram_percent": 54.3,
- "ram_used_gb": 34.536773681640625,
- "gpu_memory_used": 4015.0,
- "relative_time": 36.73981595039368
+ "timestamp": "2025-01-05T23:58:38.465432",
+ "cpu_percent": 83.04,
+ "ram_percent": 78.6,
+ "ram_used_gb": 49.980735778808594,
+ "gpu_memory_used": 6905.0,
+ "relative_time": 37.219876527786255
},
{
- "timestamp": "2025-01-04T02:38:29.912604",
- "cpu_percent": 27.61,
- "ram_percent": 54.3,
- "ram_used_gb": 34.56916427612305,
- "gpu_memory_used": 4016.0,
- "relative_time": 37.81279993057251
+ "timestamp": "2025-01-05T23:58:39.545718",
+ "cpu_percent": 27.47,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.38398742675781,
+ "gpu_memory_used": 6905.0,
+ "relative_time": 38.304253339767456
},
{
- "timestamp": "2025-01-04T02:38:30.984988",
- "cpu_percent": 34.24,
- "ram_percent": 54.4,
- "ram_used_gb": 34.599365234375,
- "gpu_memory_used": 4014.0,
- "relative_time": 38.89973425865173
+ "timestamp": "2025-01-05T23:58:40.629875",
+ "cpu_percent": 27.94,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.291404724121094,
+ "gpu_memory_used": 6903.0,
+ "relative_time": 39.388391971588135
},
{
- "timestamp": "2025-01-04T02:38:32.071596",
- "cpu_percent": 31.95,
- "ram_percent": 54.2,
- "ram_used_gb": 34.46506881713867,
- "gpu_memory_used": 4014.0,
- "relative_time": 39.95902729034424
+ "timestamp": "2025-01-05T23:58:41.713515",
+ "cpu_percent": 40.52,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.300716400146484,
+ "gpu_memory_used": 6901.0,
+ "relative_time": 40.46565055847168
},
{
- "timestamp": "2025-01-04T02:38:33.140836",
- "cpu_percent": 27.78,
- "ram_percent": 54.3,
- "ram_used_gb": 34.51242446899414,
- "gpu_memory_used": 4014.0,
- "relative_time": 41.0454580783844
+ "timestamp": "2025-01-05T23:58:42.792104",
+ "cpu_percent": 33.71,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.276004791259766,
+ "gpu_memory_used": 6906.0,
+ "relative_time": 41.568103313446045
},
{
- "timestamp": "2025-01-04T02:38:34.229919",
- "cpu_percent": 21.09,
- "ram_percent": 54.3,
- "ram_used_gb": 34.513973236083984,
- "gpu_memory_used": 4011.0,
- "relative_time": 42.133435010910034
+ "timestamp": "2025-01-05T23:58:43.893473",
+ "cpu_percent": 41.05,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.27006912231445,
+ "gpu_memory_used": 6906.0,
+ "relative_time": 42.63854742050171
},
{
- "timestamp": "2025-01-04T02:38:35.317486",
- "cpu_percent": 17.26,
- "ram_percent": 53.9,
- "ram_used_gb": 34.3167839050293,
- "gpu_memory_used": 4020.0,
- "relative_time": 43.21739077568054
+ "timestamp": "2025-01-05T23:58:44.963495",
+ "cpu_percent": 36.05,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.29539108276367,
+ "gpu_memory_used": 6907.0,
+ "relative_time": 43.74750852584839
},
{
- "timestamp": "2025-01-04T02:38:36.394375",
- "cpu_percent": 12.32,
- "ram_percent": 54.0,
- "ram_used_gb": 34.34043884277344,
- "gpu_memory_used": 4020.0,
- "relative_time": 44.27889919281006
+ "timestamp": "2025-01-05T23:58:46.072484",
+ "cpu_percent": 33.63,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.28715896606445,
+ "gpu_memory_used": 6907.0,
+ "relative_time": 44.81742191314697
},
{
- "timestamp": "2025-01-04T02:38:37.454005",
- "cpu_percent": 12.46,
- "ram_percent": 54.0,
- "ram_used_gb": 34.37453842163086,
- "gpu_memory_used": 4020.0,
- "relative_time": 45.341508626937866
+ "timestamp": "2025-01-05T23:58:47.143039",
+ "cpu_percent": 38.61,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.28096008300781,
+ "gpu_memory_used": 6906.0,
+ "relative_time": 45.88616967201233
},
{
- "timestamp": "2025-01-04T02:38:38.515337",
- "cpu_percent": 14.16,
- "ram_percent": 54.1,
- "ram_used_gb": 34.401729583740234,
- "gpu_memory_used": 4019.0,
- "relative_time": 46.410696506500244
+ "timestamp": "2025-01-05T23:58:48.211669",
+ "cpu_percent": 34.67,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.27309799194336,
+ "gpu_memory_used": 6906.0,
+ "relative_time": 46.96554446220398
},
{
- "timestamp": "2025-01-04T02:38:39.593044",
- "cpu_percent": 13.71,
- "ram_percent": 54.1,
- "ram_used_gb": 34.435630798339844,
- "gpu_memory_used": 4019.0,
- "relative_time": 47.48556661605835
+ "timestamp": "2025-01-05T23:58:49.290049",
+ "cpu_percent": 41.6,
+ "ram_percent": 77.4,
+ "ram_used_gb": 49.246002197265625,
+ "gpu_memory_used": 6906.0,
+ "relative_time": 48.04342079162598
},
{
- "timestamp": "2025-01-04T02:38:40.665509",
- "cpu_percent": 13.17,
- "ram_percent": 54.2,
- "ram_used_gb": 34.49795150756836,
- "gpu_memory_used": 4016.0,
- "relative_time": 48.551952838897705
+ "timestamp": "2025-01-05T23:58:50.368506",
+ "cpu_percent": 36.04,
+ "ram_percent": 77.4,
+ "ram_used_gb": 49.25265121459961,
+ "gpu_memory_used": 6906.0,
+ "relative_time": 49.106462717056274
},
{
- "timestamp": "2025-01-04T02:38:41.724929",
- "cpu_percent": 12.67,
- "ram_percent": 54.3,
- "ram_used_gb": 34.52568054199219,
- "gpu_memory_used": 4011.0,
- "relative_time": 49.61591196060181
+ "timestamp": "2025-01-05T23:58:51.431363",
+ "cpu_percent": 35.67,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.333187103271484,
+ "gpu_memory_used": 6901.0,
+ "relative_time": 50.20661163330078
},
{
- "timestamp": "2025-01-04T02:38:42.801080",
- "cpu_percent": 12.83,
- "ram_percent": 54.4,
- "ram_used_gb": 34.579071044921875,
- "gpu_memory_used": 4007.0,
- "relative_time": 50.70357823371887
+ "timestamp": "2025-01-05T23:58:52.531456",
+ "cpu_percent": 44.21,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.31691360473633,
+ "gpu_memory_used": 6902.0,
+ "relative_time": 51.29085111618042
},
{
- "timestamp": "2025-01-04T02:38:43.884984",
- "cpu_percent": 12.31,
- "ram_percent": 54.4,
- "ram_used_gb": 34.59829330444336,
- "gpu_memory_used": 4003.0,
- "relative_time": 51.771891832351685
+ "timestamp": "2025-01-05T23:58:53.616613",
+ "cpu_percent": 31.69,
+ "ram_percent": 77.5,
+ "ram_used_gb": 49.31277847290039,
+ "gpu_memory_used": 6902.0,
+ "relative_time": 52.36105275154114
},
{
- "timestamp": "2025-01-04T02:38:44.957477",
- "cpu_percent": 12.58,
- "ram_percent": 54.7,
- "ram_used_gb": 34.76633071899414,
- "gpu_memory_used": 4003.0,
- "relative_time": 52.859192848205566
+ "timestamp": "2025-01-05T23:58:54.686388",
+ "cpu_percent": 28.1,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.33551025390625,
+ "gpu_memory_used": 6902.0,
+ "relative_time": 53.44114351272583
},
{
- "timestamp": "2025-01-04T02:38:46.031581",
- "cpu_percent": 14.48,
- "ram_percent": 54.6,
- "ram_used_gb": 34.76308059692383,
- "gpu_memory_used": 4013.0,
- "relative_time": 53.91648840904236
+ "timestamp": "2025-01-05T23:58:55.766386",
+ "cpu_percent": 47.14,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.34830856323242,
+ "gpu_memory_used": 6902.0,
+ "relative_time": 54.51214838027954
},
{
- "timestamp": "2025-01-04T02:38:47.091693",
- "cpu_percent": 14.35,
- "ram_percent": 54.7,
- "ram_used_gb": 34.81193923950195,
- "gpu_memory_used": 4013.0,
- "relative_time": 54.993882179260254
+ "timestamp": "2025-01-05T23:58:56.837399",
+ "cpu_percent": 39.78,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.3748664855957,
+ "gpu_memory_used": 6902.0,
+ "relative_time": 55.71548247337341
},
{
- "timestamp": "2025-01-04T02:38:48.178826",
- "cpu_percent": 16.46,
- "ram_percent": 54.7,
- "ram_used_gb": 34.784278869628906,
- "gpu_memory_used": 4014.0,
- "relative_time": 56.064146518707275
+ "timestamp": "2025-01-05T23:58:58.041708",
+ "cpu_percent": 67.91,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.35768127441406,
+ "gpu_memory_used": 6908.0,
+ "relative_time": 56.83315873146057
},
{
- "timestamp": "2025-01-04T02:38:49.235997",
- "cpu_percent": 12.84,
- "ram_percent": 54.7,
- "ram_used_gb": 34.79767608642578,
- "gpu_memory_used": 4014.0,
- "relative_time": 57.12374472618103
+ "timestamp": "2025-01-05T23:58:59.158028",
+ "cpu_percent": 55.69,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.38890838623047,
+ "gpu_memory_used": 6920.0,
+ "relative_time": 57.956458568573
},
{
- "timestamp": "2025-01-04T02:38:50.295962",
- "cpu_percent": 15.69,
- "ram_percent": 54.8,
- "ram_used_gb": 34.8546257019043,
- "gpu_memory_used": 4013.0,
- "relative_time": 58.180296421051025
+ "timestamp": "2025-01-05T23:59:00.281669",
+ "cpu_percent": 48.11,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.3576545715332,
+ "gpu_memory_used": 6921.0,
+ "relative_time": 59.03302216529846
},
{
- "timestamp": "2025-01-04T02:38:51.357678",
- "cpu_percent": 14.54,
- "ram_percent": 54.8,
- "ram_used_gb": 34.8900260925293,
- "gpu_memory_used": 4014.0,
- "relative_time": 59.242270708084106
- },
- {
- "timestamp": "2025-01-04T02:38:52.415380",
- "cpu_percent": 14.74,
- "ram_percent": 54.9,
- "ram_used_gb": 34.92173767089844,
- "gpu_memory_used": 4014.0,
- "relative_time": 60.307114601135254
- },
- {
- "timestamp": "2025-01-04T02:38:53.490598",
- "cpu_percent": 13.82,
- "ram_percent": 55.1,
- "ram_used_gb": 35.028907775878906,
- "gpu_memory_used": 4014.0,
- "relative_time": 61.37576389312744
- },
- {
- "timestamp": "2025-01-04T02:38:54.548660",
- "cpu_percent": 11.31,
- "ram_percent": 55.1,
- "ram_used_gb": 35.05375289916992,
- "gpu_memory_used": 4011.0,
- "relative_time": 62.43392610549927
- },
- {
- "timestamp": "2025-01-04T02:38:55.609900",
- "cpu_percent": 14.35,
- "ram_percent": 55.1,
- "ram_used_gb": 35.03831100463867,
- "gpu_memory_used": 4011.0,
- "relative_time": 63.493370056152344
- },
- {
- "timestamp": "2025-01-04T02:38:56.666032",
- "cpu_percent": 13.11,
- "ram_percent": 55.1,
- "ram_used_gb": 35.07795333862305,
- "gpu_memory_used": 4011.0,
- "relative_time": 64.54955720901489
- },
- {
- "timestamp": "2025-01-04T02:38:57.730782",
- "cpu_percent": 16.01,
- "ram_percent": 55.2,
- "ram_used_gb": 35.11598587036133,
- "gpu_memory_used": 4007.0,
- "relative_time": 65.61445665359497
- },
- {
- "timestamp": "2025-01-04T02:38:58.787051",
- "cpu_percent": 13.68,
- "ram_percent": 55.3,
- "ram_used_gb": 35.15185546875,
- "gpu_memory_used": 4007.0,
- "relative_time": 66.67095923423767
- },
- {
- "timestamp": "2025-01-04T02:38:59.847606",
- "cpu_percent": 13.08,
- "ram_percent": 55.3,
- "ram_used_gb": 35.183753967285156,
- "gpu_memory_used": 4003.0,
- "relative_time": 67.73307466506958
- },
- {
- "timestamp": "2025-01-04T02:39:00.918871",
- "cpu_percent": 13.26,
- "ram_percent": 55.4,
- "ram_used_gb": 35.22275161743164,
- "gpu_memory_used": 4005.0,
- "relative_time": 68.80590057373047
- },
- {
- "timestamp": "2025-01-04T02:39:01.991170",
- "cpu_percent": 11.05,
- "ram_percent": 55.3,
- "ram_used_gb": 35.15507125854492,
- "gpu_memory_used": 4005.0,
- "relative_time": 69.8813705444336
- },
- {
- "timestamp": "2025-01-04T02:39:03.058942",
- "cpu_percent": 11.08,
- "ram_percent": 55.4,
- "ram_used_gb": 35.2095947265625,
- "gpu_memory_used": 4005.0,
- "relative_time": 70.94484400749207
- },
- {
- "timestamp": "2025-01-04T02:39:04.116159",
- "cpu_percent": 12.74,
- "ram_percent": 55.4,
- "ram_used_gb": 35.24392318725586,
- "gpu_memory_used": 4005.0,
- "relative_time": 72.00281810760498
- },
- {
- "timestamp": "2025-01-04T02:39:05.187410",
- "cpu_percent": 11.04,
- "ram_percent": 55.4,
- "ram_used_gb": 35.259830474853516,
- "gpu_memory_used": 4005.0,
- "relative_time": 73.07217526435852
- },
- {
- "timestamp": "2025-01-04T02:39:06.248588",
- "cpu_percent": 13.66,
- "ram_percent": 55.5,
- "ram_used_gb": 35.29854965209961,
- "gpu_memory_used": 4005.0,
- "relative_time": 74.13533973693848
- },
- {
- "timestamp": "2025-01-04T02:39:07.321179",
- "cpu_percent": 11.16,
- "ram_percent": 55.6,
- "ram_used_gb": 35.346981048583984,
- "gpu_memory_used": 4005.0,
- "relative_time": 75.18772435188293
- },
- {
- "timestamp": "2025-01-04T02:39:08.362496",
- "cpu_percent": 9.46,
- "ram_percent": 56.1,
- "ram_used_gb": 35.69393539428711,
- "gpu_memory_used": 4006.0,
- "relative_time": 76.25136637687683
- },
- {
- "timestamp": "2025-01-04T02:39:09.438218",
- "cpu_percent": 15.88,
- "ram_percent": 56.1,
- "ram_used_gb": 35.6658821105957,
- "gpu_memory_used": 4006.0,
- "relative_time": 77.32329249382019
- },
- {
- "timestamp": "2025-01-04T02:39:10.495653",
- "cpu_percent": 13.25,
- "ram_percent": 56.1,
- "ram_used_gb": 35.658119201660156,
- "gpu_memory_used": 4014.0,
- "relative_time": 78.38210940361023
- },
- {
- "timestamp": "2025-01-04T02:39:11.560094",
- "cpu_percent": 10.8,
- "ram_percent": 56.1,
- "ram_used_gb": 35.694610595703125,
- "gpu_memory_used": 4014.0,
- "relative_time": 79.44816374778748
- },
- {
- "timestamp": "2025-01-04T02:39:12.621879",
- "cpu_percent": 12.58,
- "ram_percent": 56.1,
- "ram_used_gb": 35.68545150756836,
- "gpu_memory_used": 4014.0,
- "relative_time": 80.51017951965332
- },
- {
- "timestamp": "2025-01-04T02:39:13.692834",
- "cpu_percent": 13.3,
- "ram_percent": 56.2,
- "ram_used_gb": 35.730979919433594,
- "gpu_memory_used": 4012.0,
- "relative_time": 81.57789969444275
- },
- {
- "timestamp": "2025-01-04T02:39:14.753401",
- "cpu_percent": 14.75,
- "ram_percent": 56.2,
- "ram_used_gb": 35.73103332519531,
- "gpu_memory_used": 4012.0,
- "relative_time": 82.63830900192261
- },
- {
- "timestamp": "2025-01-04T02:39:15.811385",
- "cpu_percent": 14.69,
- "ram_percent": 56.2,
- "ram_used_gb": 35.740108489990234,
- "gpu_memory_used": 4011.0,
- "relative_time": 83.69796371459961
- },
- {
- "timestamp": "2025-01-04T02:39:16.874197",
- "cpu_percent": 14.28,
- "ram_percent": 56.2,
- "ram_used_gb": 35.767982482910156,
- "gpu_memory_used": 4010.0,
- "relative_time": 84.76145887374878
- },
- {
- "timestamp": "2025-01-04T02:39:17.936944",
- "cpu_percent": 12.99,
- "ram_percent": 56.3,
- "ram_used_gb": 35.81233596801758,
- "gpu_memory_used": 4010.0,
- "relative_time": 85.84119439125061
- },
- {
- "timestamp": "2025-01-04T02:39:19.027320",
- "cpu_percent": 12.79,
- "ram_percent": 56.6,
- "ram_used_gb": 36.0085563659668,
- "gpu_memory_used": 4010.0,
- "relative_time": 86.91442775726318
- },
- {
- "timestamp": "2025-01-04T02:39:20.088804",
- "cpu_percent": 15.62,
- "ram_percent": 56.9,
- "ram_used_gb": 36.16616439819336,
- "gpu_memory_used": 4006.0,
- "relative_time": 88.00494360923767
- },
- {
- "timestamp": "2025-01-04T02:39:21.191796",
- "cpu_percent": 12.98,
- "ram_percent": 57.1,
- "ram_used_gb": 36.3217658996582,
- "gpu_memory_used": 4005.0,
- "relative_time": 89.12522411346436
- },
- {
- "timestamp": "2025-01-04T02:39:22.311508",
- "cpu_percent": 14.77,
- "ram_percent": 56.9,
- "ram_used_gb": 36.191429138183594,
- "gpu_memory_used": 4005.0,
- "relative_time": 90.19932198524475
- },
- {
- "timestamp": "2025-01-04T02:39:23.372871",
- "cpu_percent": 12.98,
- "ram_percent": 57.1,
- "ram_used_gb": 36.29658889770508,
- "gpu_memory_used": 4005.0,
- "relative_time": 91.3045928478241
- },
- {
- "timestamp": "2025-01-04T02:39:24.479087",
- "cpu_percent": 14.64,
- "ram_percent": 57.1,
- "ram_used_gb": 36.30413055419922,
- "gpu_memory_used": 3998.0,
- "relative_time": 92.36360597610474
- },
- {
- "timestamp": "2025-01-04T02:39:25.537969",
- "cpu_percent": 14.51,
- "ram_percent": 57.1,
- "ram_used_gb": 36.311763763427734,
- "gpu_memory_used": 3998.0,
- "relative_time": 93.42230415344238
- },
- {
- "timestamp": "2025-01-04T02:39:26.594967",
- "cpu_percent": 13.05,
- "ram_percent": 57.1,
- "ram_used_gb": 36.351402282714844,
- "gpu_memory_used": 3998.0,
- "relative_time": 94.47847175598145
- },
- {
- "timestamp": "2025-01-04T02:39:27.652223",
- "cpu_percent": 15.05,
- "ram_percent": 57.2,
- "ram_used_gb": 36.36949157714844,
- "gpu_memory_used": 4004.0,
- "relative_time": 95.53560948371887
- },
- {
- "timestamp": "2025-01-04T02:39:28.708008",
- "cpu_percent": 12.51,
- "ram_percent": 57.2,
- "ram_used_gb": 36.3841667175293,
- "gpu_memory_used": 4004.0,
- "relative_time": 96.59472155570984
- },
- {
- "timestamp": "2025-01-04T02:39:29.768866",
- "cpu_percent": 10.83,
- "ram_percent": 57.2,
- "ram_used_gb": 36.39939880371094,
- "gpu_memory_used": 4004.0,
- "relative_time": 97.6679356098175
- },
- {
- "timestamp": "2025-01-04T02:39:30.844295",
- "cpu_percent": 14.61,
- "ram_percent": 57.3,
- "ram_used_gb": 36.42519760131836,
- "gpu_memory_used": 4004.0,
- "relative_time": 98.74996089935303
- },
- {
- "timestamp": "2025-01-04T02:39:31.934080",
- "cpu_percent": 11.74,
- "ram_percent": 57.0,
- "ram_used_gb": 36.271087646484375,
- "gpu_memory_used": 4004.0,
- "relative_time": 99.81860518455505
- },
- {
- "timestamp": "2025-01-04T02:39:32.989954",
- "cpu_percent": 12.09,
- "ram_percent": 57.2,
- "ram_used_gb": 36.368350982666016,
- "gpu_memory_used": 4010.0,
- "relative_time": 100.87712931632996
- },
- {
- "timestamp": "2025-01-04T02:39:34.061411",
- "cpu_percent": 11.07,
- "ram_percent": 57.2,
- "ram_used_gb": 36.38072967529297,
- "gpu_memory_used": 4010.0,
- "relative_time": 101.946035861969
- },
- {
- "timestamp": "2025-01-04T02:39:35.117182",
- "cpu_percent": 9.32,
- "ram_percent": 57.2,
- "ram_used_gb": 36.367733001708984,
- "gpu_memory_used": 4415.0,
- "relative_time": 103.00355505943298
- },
- {
- "timestamp": "2025-01-04T02:39:36.179256",
- "cpu_percent": 12.93,
- "ram_percent": 57.2,
- "ram_used_gb": 36.396636962890625,
- "gpu_memory_used": 4417.0,
- "relative_time": 104.06347131729126
- },
- {
- "timestamp": "2025-01-04T02:39:37.237454",
- "cpu_percent": 10.94,
- "ram_percent": 57.3,
- "ram_used_gb": 36.429630279541016,
- "gpu_memory_used": 4417.0,
- "relative_time": 105.12580728530884
- },
- {
- "timestamp": "2025-01-04T02:39:38.310321",
- "cpu_percent": 12.86,
- "ram_percent": 57.3,
- "ram_used_gb": 36.44291305541992,
- "gpu_memory_used": 4418.0,
- "relative_time": 106.17753839492798
- },
- {
- "timestamp": "2025-01-04T02:39:39.355358",
- "cpu_percent": 10.82,
- "ram_percent": 57.3,
- "ram_used_gb": 36.46603012084961,
- "gpu_memory_used": 4418.0,
- "relative_time": 107.24251008033752
- },
- {
- "timestamp": "2025-01-04T02:39:40.413524",
- "cpu_percent": 14.64,
- "ram_percent": 57.4,
- "ram_used_gb": 36.507179260253906,
- "gpu_memory_used": 4418.0,
- "relative_time": 108.29774165153503
- },
- {
- "timestamp": "2025-01-04T02:39:41.482368",
- "cpu_percent": 10.03,
- "ram_percent": 58.1,
- "ram_used_gb": 36.93812942504883,
- "gpu_memory_used": 4418.0,
- "relative_time": 109.36836910247803
- },
- {
- "timestamp": "2025-01-04T02:39:42.546204",
- "cpu_percent": 12.63,
- "ram_percent": 58.0,
- "ram_used_gb": 36.87542724609375,
- "gpu_memory_used": 4418.0,
- "relative_time": 110.43055510520935
- },
- {
- "timestamp": "2025-01-04T02:39:43.604666",
- "cpu_percent": 14.14,
- "ram_percent": 58.0,
- "ram_used_gb": 36.875328063964844,
- "gpu_memory_used": 4426.0,
- "relative_time": 111.49229407310486
- },
- {
- "timestamp": "2025-01-04T02:39:44.664973",
- "cpu_percent": 10.64,
- "ram_percent": 58.0,
- "ram_used_gb": 36.88217544555664,
- "gpu_memory_used": 4425.0,
- "relative_time": 112.55481696128845
- },
- {
- "timestamp": "2025-01-04T02:39:45.741305",
- "cpu_percent": 13.92,
- "ram_percent": 57.9,
- "ram_used_gb": 36.85449981689453,
- "gpu_memory_used": 4425.0,
- "relative_time": 113.62504053115845
- },
- {
- "timestamp": "2025-01-04T02:39:46.799656",
- "cpu_percent": 13.15,
- "ram_percent": 58.0,
- "ram_used_gb": 36.870826721191406,
- "gpu_memory_used": 4423.0,
- "relative_time": 114.6845052242279
- },
- {
- "timestamp": "2025-01-04T02:39:47.859725",
- "cpu_percent": 13.85,
- "ram_percent": 58.0,
- "ram_used_gb": 36.870948791503906,
- "gpu_memory_used": 4423.0,
- "relative_time": 115.74664235115051
- },
- {
- "timestamp": "2025-01-04T02:39:48.919071",
- "cpu_percent": 14.59,
- "ram_percent": 58.0,
- "ram_used_gb": 36.886802673339844,
- "gpu_memory_used": 4422.0,
- "relative_time": 116.80267906188965
- },
- {
- "timestamp": "2025-01-04T02:39:49.976541",
- "cpu_percent": 16.56,
- "ram_percent": 58.0,
- "ram_used_gb": 36.90068435668945,
- "gpu_memory_used": 4422.0,
- "relative_time": 117.86520886421204
- },
- {
- "timestamp": "2025-01-04T02:39:51.036593",
- "cpu_percent": 10.33,
- "ram_percent": 58.1,
- "ram_used_gb": 36.96821212768555,
- "gpu_memory_used": 4416.0,
- "relative_time": 118.92232513427734
- },
- {
- "timestamp": "2025-01-04T02:39:52.098186",
- "cpu_percent": 15.23,
- "ram_percent": 58.1,
- "ram_used_gb": 36.96358108520508,
- "gpu_memory_used": 4416.0,
- "relative_time": 119.98378920555115
- },
- {
- "timestamp": "2025-01-04T02:39:53.168566",
- "cpu_percent": 11.96,
- "ram_percent": 58.2,
- "ram_used_gb": 37.00669479370117,
- "gpu_memory_used": 4416.0,
- "relative_time": 121.05223441123962
- },
- {
- "timestamp": "2025-01-04T02:39:54.230292",
- "cpu_percent": 13.03,
- "ram_percent": 58.2,
- "ram_used_gb": 37.031307220458984,
- "gpu_memory_used": 4416.0,
- "relative_time": 122.11563086509705
- },
- {
- "timestamp": "2025-01-04T02:39:55.287670",
- "cpu_percent": 14.93,
- "ram_percent": 58.2,
- "ram_used_gb": 37.045589447021484,
- "gpu_memory_used": 4416.0,
- "relative_time": 123.17237305641174
- },
- {
- "timestamp": "2025-01-04T02:39:56.349816",
- "cpu_percent": 12.91,
- "ram_percent": 58.3,
- "ram_used_gb": 37.072689056396484,
- "gpu_memory_used": 4416.0,
- "relative_time": 124.23265671730042
- },
- {
- "timestamp": "2025-01-04T02:39:57.409384",
- "cpu_percent": 11.35,
- "ram_percent": 58.3,
- "ram_used_gb": 37.10455322265625,
- "gpu_memory_used": 4416.0,
- "relative_time": 125.29221749305725
- },
- {
- "timestamp": "2025-01-04T02:39:58.464653",
- "cpu_percent": 12.97,
- "ram_percent": 58.4,
- "ram_used_gb": 37.12955093383789,
- "gpu_memory_used": 4416.0,
- "relative_time": 126.34849739074707
- },
- {
- "timestamp": "2025-01-04T02:39:59.521737",
- "cpu_percent": 11.69,
- "ram_percent": 58.4,
- "ram_used_gb": 37.139190673828125,
- "gpu_memory_used": 4416.0,
- "relative_time": 127.40602111816406
- },
- {
- "timestamp": "2025-01-04T02:40:00.581455",
- "cpu_percent": 12.86,
- "ram_percent": 58.5,
- "ram_used_gb": 37.204673767089844,
- "gpu_memory_used": 4418.0,
- "relative_time": 128.5798671245575
- },
- {
- "timestamp": "2025-01-04T02:40:01.760893",
- "cpu_percent": 81.59,
- "ram_percent": 58.1,
- "ram_used_gb": 36.97315216064453,
- "gpu_memory_used": 4425.0,
- "relative_time": 129.6643455028534
- },
- {
- "timestamp": "2025-01-04T02:40:02.850000",
- "cpu_percent": 14.55,
- "ram_percent": 58.2,
- "ram_used_gb": 37.0354118347168,
- "gpu_memory_used": 4435.0,
- "relative_time": 130.7529788017273
- },
- {
- "timestamp": "2025-01-04T02:40:03.934586",
- "cpu_percent": 13.16,
- "ram_percent": 58.2,
- "ram_used_gb": 37.03787612915039,
- "gpu_memory_used": 4437.0,
- "relative_time": 131.81812405586243
- },
- {
- "timestamp": "2025-01-04T02:40:04.989915",
- "cpu_percent": 11.0,
- "ram_percent": 58.3,
- "ram_used_gb": 37.09538650512695,
- "gpu_memory_used": 4437.0,
- "relative_time": 132.88244915008545
- },
- {
- "timestamp": "2025-01-04T02:40:06.067559",
- "cpu_percent": 32.79,
- "ram_percent": 58.3,
- "ram_used_gb": 37.07184982299805,
- "gpu_memory_used": 4437.0,
- "relative_time": 133.97513842582703
- },
- {
- "timestamp": "2025-01-04T02:40:07.156881",
- "cpu_percent": 22.34,
- "ram_percent": 58.3,
- "ram_used_gb": 37.07517623901367,
- "gpu_memory_used": 4438.0,
- "relative_time": 135.04176831245422
- },
- {
- "timestamp": "2025-01-04T02:40:08.212791",
- "cpu_percent": 14.86,
- "ram_percent": 58.3,
- "ram_used_gb": 37.08013153076172,
- "gpu_memory_used": 4438.0,
- "relative_time": 136.1427457332611
- },
- {
- "timestamp": "2025-01-04T02:40:09.317884",
- "cpu_percent": 21.55,
- "ram_percent": 58.4,
- "ram_used_gb": 37.144142150878906,
- "gpu_memory_used": 4447.0,
- "relative_time": 137.20455360412598
- },
- {
- "timestamp": "2025-01-04T02:40:10.390292",
- "cpu_percent": 26.97,
- "ram_percent": 58.4,
- "ram_used_gb": 37.141868591308594,
- "gpu_memory_used": 4454.0,
- "relative_time": 138.2930736541748
- },
- {
- "timestamp": "2025-01-04T02:40:11.464548",
- "cpu_percent": 12.21,
- "ram_percent": 58.5,
- "ram_used_gb": 37.205867767333984,
- "gpu_memory_used": 4451.0,
- "relative_time": 139.35198616981506
- },
- {
- "timestamp": "2025-01-04T02:40:12.537687",
- "cpu_percent": 13.14,
- "ram_percent": 58.5,
- "ram_used_gb": 37.233299255371094,
- "gpu_memory_used": 4452.0,
- "relative_time": 140.4236707687378
- },
- {
- "timestamp": "2025-01-04T02:40:13.608158",
- "cpu_percent": 13.93,
- "ram_percent": 58.6,
- "ram_used_gb": 37.25624465942383,
- "gpu_memory_used": 4452.0,
- "relative_time": 141.4932518005371
- },
- {
- "timestamp": "2025-01-04T02:40:14.668199",
- "cpu_percent": 15.76,
- "ram_percent": 58.6,
- "ram_used_gb": 37.278499603271484,
- "gpu_memory_used": 4452.0,
- "relative_time": 142.57055759429932
- },
- {
- "timestamp": "2025-01-04T02:40:15.754506",
- "cpu_percent": 14.59,
- "ram_percent": 58.5,
- "ram_used_gb": 37.21017837524414,
- "gpu_memory_used": 4451.0,
- "relative_time": 143.64187097549438
- },
+ "timestamp": "2025-01-05T23:59:01.358046",
+ "cpu_percent": 34.9,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.3621711730957,
+ "gpu_memory_used": 6921.0,
+ "relative_time": 60.10921263694763
+ },
{
- "timestamp": "2025-01-04T02:40:16.827392",
- "cpu_percent": 17.55,
- "ram_percent": 58.4,
- "ram_used_gb": 37.1205940246582,
- "gpu_memory_used": 4450.0,
- "relative_time": 144.75147438049316
- },
+ "timestamp": "2025-01-05T23:59:02.434159",
+ "cpu_percent": 30.19,
+ "ram_percent": 77.6,
+ "ram_used_gb": 49.35695266723633,
+ "gpu_memory_used": 6919.0,
+ "relative_time": 61.16992497444153
+ },
{
- "timestamp": "2025-01-04T02:40:17.929858",
- "cpu_percent": 55.09,
- "ram_percent": 58.2,
- "ram_used_gb": 37.04216766357422,
- "gpu_memory_used": 4449.0,
- "relative_time": 145.815936088562
- },
+ "timestamp": "2025-01-05T23:59:03.495845",
+ "cpu_percent": 33.29,
+ "ram_percent": 77.9,
+ "ram_used_gb": 49.53657531738281,
+ "gpu_memory_used": 6916.0,
+ "relative_time": 62.276703119277954
+ },
{
- "timestamp": "2025-01-04T02:40:18.988009",
- "cpu_percent": 13.92,
- "ram_percent": 58.0,
- "ram_used_gb": 36.90068054199219,
- "gpu_memory_used": 4447.0,
- "relative_time": 146.8880341053009
- },
+ "timestamp": "2025-01-05T23:59:04.602367",
+ "cpu_percent": 43.51,
+ "ram_percent": 77.9,
+ "ram_used_gb": 49.58420181274414,
+ "gpu_memory_used": 6919.0,
+ "relative_time": 63.39539980888367
+ },
{
- "timestamp": "2025-01-04T02:40:20.062567",
- "cpu_percent": 17.42,
- "ram_percent": 57.7,
- "ram_used_gb": 36.69455337524414,
- "gpu_memory_used": 4446.0,
- "relative_time": 147.96440315246582
- },
+ "timestamp": "2025-01-05T23:59:05.720517",
+ "cpu_percent": 40.01,
+ "ram_percent": 78.1,
+ "ram_used_gb": 49.65230941772461,
+ "gpu_memory_used": 6919.0,
+ "relative_time": 64.45865726470947
+ },
{
- "timestamp": "2025-01-04T02:40:21.149129",
- "cpu_percent": 14.78,
- "ram_percent": 57.4,
- "ram_used_gb": 36.50687789916992,
- "gpu_memory_used": 4445.0,
- "relative_time": 149.041100025177
- },
+ "timestamp": "2025-01-05T23:59:06.783467",
+ "cpu_percent": 30.16,
+ "ram_percent": 78.1,
+ "ram_used_gb": 49.673973083496094,
+ "gpu_memory_used": 6918.0,
+ "relative_time": 65.54732704162598
+ },
{
- "timestamp": "2025-01-04T02:40:22.221780",
- "cpu_percent": 11.1,
- "ram_percent": 57.0,
- "ram_used_gb": 36.28267288208008,
- "gpu_memory_used": 4438.0,
- "relative_time": 150.125506401062
- },
+ "timestamp": "2025-01-05T23:59:07.872827",
+ "cpu_percent": 22.64,
+ "ram_percent": 78.0,
+ "ram_used_gb": 49.64229202270508,
+ "gpu_memory_used": 6918.0,
+ "relative_time": 66.6279137134552
+ },
{
- "timestamp": "2025-01-04T02:40:23.308492",
- "cpu_percent": 12.21,
- "ram_percent": 56.7,
- "ram_used_gb": 36.036773681640625,
- "gpu_memory_used": 4436.0,
- "relative_time": 151.19524502754211
- },
+ "timestamp": "2025-01-05T23:59:08.954513",
+ "cpu_percent": 45.52,
+ "ram_percent": 77.9,
+ "ram_used_gb": 49.53507995605469,
+ "gpu_memory_used": 6922.0,
+ "relative_time": 67.7145938873291
+ },
{
- "timestamp": "2025-01-04T02:40:24.381177",
- "cpu_percent": 13.79,
- "ram_percent": 56.3,
- "ram_used_gb": 35.83684539794922,
- "gpu_memory_used": 4436.0,
- "relative_time": 152.26534175872803
- },
+ "timestamp": "2025-01-05T23:59:10.039422",
+ "cpu_percent": 40.01,
+ "ram_percent": 77.0,
+ "ram_used_gb": 48.967567443847656,
+ "gpu_memory_used": 6923.0,
+ "relative_time": 68.80349135398865
+ },
{
- "timestamp": "2025-01-04T02:40:25.452457",
- "cpu_percent": 12.28,
- "ram_percent": 56.4,
- "ram_used_gb": 35.848087310791016,
- "gpu_memory_used": 4436.0,
- "relative_time": 153.33880996704102
- },
+ "timestamp": "2025-01-05T23:59:11.129835",
+ "cpu_percent": 36.44,
+ "ram_percent": 71.2,
+ "ram_used_gb": 45.274654388427734,
+ "gpu_memory_used": 6923.0,
+ "relative_time": 69.87114715576172
+ },
{
- "timestamp": "2025-01-04T02:40:26.521613",
- "cpu_percent": 12.52,
- "ram_percent": 56.8,
- "ram_used_gb": 36.1606330871582,
- "gpu_memory_used": 4440.0,
- "relative_time": 154.40920901298523
- },
+ "timestamp": "2025-01-05T23:59:12.203989",
+ "cpu_percent": 38.48,
+ "ram_percent": 71.5,
+ "ram_used_gb": 45.46482849121094,
+ "gpu_memory_used": 6921.0,
+ "relative_time": 70.97379231452942
+ },
{
- "timestamp": "2025-01-04T02:40:27.587547",
- "cpu_percent": 7.94,
- "ram_percent": 57.2,
- "ram_used_gb": 36.37208557128906,
- "gpu_memory_used": 4440.0,
- "relative_time": 155.46942234039307
- },
+ "timestamp": "2025-01-05T23:59:13.331638",
+ "cpu_percent": 29.37,
+ "ram_percent": 71.4,
+ "ram_used_gb": 45.428951263427734,
+ "gpu_memory_used": 6924.0,
+ "relative_time": 72.11824035644531
+ },
{
- "timestamp": "2025-01-04T02:40:28.647400",
- "cpu_percent": 8.85,
- "ram_percent": 57.3,
- "ram_used_gb": 36.470054626464844,
- "gpu_memory_used": 4440.0,
- "relative_time": 156.53129720687866
+ "timestamp": "2025-01-05T23:59:14.510728",
+ "cpu_percent": 29.0,
+ "ram_percent": 71.1,
+ "ram_used_gb": 45.226200103759766,
+ "gpu_memory_used": 6916.0,
+ "relative_time": 73.27950477600098
}
],
- "test_duration": 159.19756031036377
+ "test_duration": 76.54887413978577
}
\ No newline at end of file
diff --git a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt
index cb2df6a..91be8e5 100644
--- a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt
+++ b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt
@@ -1,23 +1,23 @@
=== Benchmark Statistics (with correct RTF) ===
-Total tokens processed: 17150
-Total audio generated (s): 5296.38
-Total test duration (s): 155.23
-Average processing rate (tokens/s): 102.86
-Average RTF: 0.03
-Average Real Time Speed: 31.25
+Total tokens processed: 3150
+Total audio generated (s): 1056.03
+Total test duration (s): 70.20
+Average processing rate (tokens/s): 46.46
+Average RTF: 0.07
+Average Real Time Speed: 15.00
=== Per-chunk Stats ===
-Average chunk size (tokens): 1715.00
+Average chunk size (tokens): 525.00
Min chunk size (tokens): 150
-Max chunk size (tokens): 5000
-Average processing time (s): 15.39
-Average output length (s): 529.64
+Max chunk size (tokens): 900
+Average processing time (s): 11.57
+Average output length (s): 176.00
=== Performance Ranges ===
-Processing rate range (tokens/s): 80.65 - 125.10
-RTF range: 0.03x - 0.04x
-Real Time Speed range: 25.00x - 33.33x
+Processing rate range (tokens/s): 40.07 - 53.57
+RTF range: 0.06x - 0.08x
+Real Time Speed range: 12.50x - 16.67x
diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png
index 339c896..2317c52 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png
index 3e5c8d1..919c53b 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png
index e209978..5143bda 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_latency.png b/examples/assorted_checks/benchmarks/output_plots/first_token_latency.png
deleted file mode 100644
index d969fb8..0000000
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_latency.png and /dev/null differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png
index 4ed4b65..75c1a19 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png and b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png
index 56f6e31..c78f28c 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png and b/examples/assorted_checks/benchmarks/output_plots/first_token_latency_stream_openai.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline.png b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline.png
deleted file mode 100644
index 251b172..0000000
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline.png and /dev/null differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png
index dd7162c..47ac017 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png and b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png
index 4b36a87..64f2208 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png and b/examples/assorted_checks/benchmarks/output_plots/first_token_timeline_stream_openai.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/format_comparison.png b/examples/assorted_checks/benchmarks/output_plots/format_comparison.png
deleted file mode 100644
index 95ac515..0000000
Binary files a/examples/assorted_checks/benchmarks/output_plots/format_comparison.png and /dev/null differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png
index 62c6864..ebf7bcf 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png
index 1c5d7b7..bcdacda 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png
index 942b3a8..a6a6ea5 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png and b/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_usage.png b/examples/assorted_checks/benchmarks/output_plots/gpu_usage.png
deleted file mode 100644
index 1bc44dc..0000000
Binary files a/examples/assorted_checks/benchmarks/output_plots/gpu_usage.png and /dev/null differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png
index e595fff..4e94aaa 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png and b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream.png differ
diff --git a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png
index 47cfbde..05088c3 100644
Binary files a/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png and b/examples/assorted_checks/benchmarks/output_plots/total_time_latency_stream_openai.png differ
diff --git a/examples/assorted_checks/generate_readme_plots.py b/examples/assorted_checks/generate_readme_plots.py
new file mode 100644
index 0000000..a6e5da2
--- /dev/null
+++ b/examples/assorted_checks/generate_readme_plots.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+"""Script to generate all plots needed for the README."""
+
+import os
+import sys
+import shutil
+from pathlib import Path
+
+from validate_wav import validate_tts
+
+# Get absolute paths
+script_dir = Path(__file__).parent.resolve()
+project_root = script_dir.parent.parent
+
+# Add directories to Python path for imports
+sys.path.append(str(script_dir))
+sys.path.append(str(script_dir / "benchmarks"))
+
+# Import test scripts
+from benchmark_tts_rtf import main as benchmark_rtf
+from test_formats.test_audio_formats import main as test_formats
+from benchmark_first_token_stream_unified import main as benchmark_stream
+from test_combinations.test_analyze_combined_voices import main as test_voice_analysis
+
+# Remove directories from path after imports
+sys.path.remove(str(script_dir))
+sys.path.remove(str(script_dir / "benchmarks"))
+
+
+def ensure_assets_dir():
+ """Create assets directory if it doesn't exist."""
+ assets_dir = project_root / "assets"
+ assets_dir.mkdir(exist_ok=True)
+ return assets_dir
+
+
+def copy_plot(src_path: str, dest_name: str, assets_dir: Path):
+ """Copy a plot to the assets directory with a new name."""
+ if os.path.exists(src_path):
+ shutil.copy2(src_path, assets_dir / dest_name)
+ print(f"Copied {src_path} to {assets_dir / dest_name}")
+ else:
+ print(f"Warning: Source plot not found at {src_path}")
+
+
+def validate_and_print(wav_path: str, category: str):
+ """Validate a WAV file and print results."""
+ if not os.path.exists(wav_path):
+ print(f"Warning: WAV file not found at {wav_path}")
+ return
+
+ print(f"\n=== Validating {category} Audio ===")
+ result = validate_tts(wav_path)
+
+ if "error" in result:
+ print(f"Error: {result['error']}")
+ else:
+ print(f"Duration: {result['duration']}")
+ print(f"Sample Rate: {result['sample_rate']} Hz")
+ print(f"Peak Amplitude: {result['peak_amplitude']}")
+ print(f"RMS Level: {result['rms_level']}")
+
+ if result["issues"]:
+ print("\nIssues Found:")
+ for issue in result["issues"]:
+ print(f"- {issue}")
+ else:
+ print("\nNo issues found")
+
+
+def main():
+ """Generate all plots needed for the README."""
+ # Ensure assets directory exists
+ prefix = "gpu"
+ assets_dir = ensure_assets_dir()
+
+ print("\n=== Generating Format Comparison Plot ===")
+ test_formats()
+ copy_plot(
+ str(script_dir / "test_formats/output/test_formats/format_comparison.png"),
+ "format_comparison.png",
+ assets_dir,
+ )
+ # Validate WAV output from format test
+ validate_and_print(
+ str(script_dir / "test_formats/output/test_formats/speech.wav"),
+ "Format Test WAV",
+ )
+
+ print("\n=== Generating Voice Analysis Plot ===")
+ test_voice_analysis()
+ copy_plot(
+ str(script_dir / "test_combinations/output/analysis_comparison.png"),
+ "voice_analysis.png",
+ assets_dir,
+ )
+ # Validate combined voice output
+ validate_and_print(
+ str(
+ script_dir
+ / "test_combinations/output/analysis_combined_af_bella_af_nicole.wav"
+ ),
+ "Combined Voice",
+ )
+
+ print("\n=== Generating Performance Benchmark Plots ===")
+ benchmark_rtf()
+ copy_plot(
+ str(script_dir / f"benchmarks/output_plots/{prefix}_processing_time_rtf.png"),
+ f"{prefix}_processing_time.png",
+ assets_dir,
+ )
+ copy_plot(
+ str(script_dir / f"benchmarks/output_plots/{prefix}_realtime_factor_rtf.png"),
+ f"{prefix}_realtime_factor.png",
+ assets_dir,
+ )
+ # Validate RTF benchmark output (~500 tokens)
+ validate_and_print(
+ str(script_dir / "benchmarks/output_audio/chunk_450_tokens.wav"),
+ "RTF Benchmark",
+ )
+
+ print("\n=== Generating Streaming Benchmark Plots ===")
+ benchmark_stream()
+
+ # Copy direct streaming plots
+ copy_plot(
+ str(script_dir / "benchmarks/output_plots/first_token_latency_stream.png"),
+ f"{prefix}_first_token_latency_direct.png",
+ assets_dir,
+ )
+ copy_plot(
+ str(script_dir / "benchmarks/output_plots/first_token_timeline_stream.png"),
+ f"{prefix}_first_token_timeline_direct.png",
+ assets_dir,
+ )
+ copy_plot(
+ str(script_dir / "benchmarks/output_plots/total_time_latency_stream.png"),
+ f"{prefix}_total_time_latency_direct.png",
+ assets_dir,
+ )
+
+ # Copy OpenAI streaming plots
+ copy_plot(
+ str(
+ script_dir / "benchmarks/output_plots/first_token_latency_stream_openai.png"
+ ),
+ f"{prefix}_first_token_latency_openai.png",
+ assets_dir,
+ )
+ copy_plot(
+ str(
+ script_dir
+ / "benchmarks/output_plots/first_token_timeline_stream_openai.png"
+ ),
+ f"{prefix}_first_token_timeline_openai.png",
+ assets_dir,
+ )
+ copy_plot(
+ str(
+ script_dir / "benchmarks/output_plots/total_time_latency_stream_openai.png"
+ ),
+ f"{prefix}_total_time_latency_openai.png",
+ assets_dir,
+ )
+
+ # Wait a moment for files to be generated
+ import time
+
+ time.sleep(2)
+
+ # Validate streaming outputs (~500 tokens)
+ validate_and_print(
+ str(
+ script_dir
+ / "benchmarks/output_audio_stream/benchmark_tokens500_run1_stream.wav"
+ ),
+ "Direct Streaming",
+ )
+ validate_and_print(
+ str(
+ script_dir
+ / "benchmarks/output_audio_stream_openai/benchmark_tokens500_run1_stream_openai.wav"
+ ),
+ "OpenAI Streaming",
+ )
+
+ validate_and_print(
+ str(script_dir / "test_formats/output/test_formats/test_audio.wav"),
+ "Format Test WAV",
+ )
+
+ print("\nAll plots have been generated and copied to the assets directory")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py b/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py
index ec280e2..134f554 100644
--- a/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py
+++ b/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py
@@ -73,6 +73,7 @@ def generate_speech(
"voice": voice,
"speed": 1.0,
"response_format": "wav", # Use WAV for analysis
+ "stream": False,
},
)
@@ -193,9 +194,10 @@ def plot_analysis(audio_files: Dict[str, str], output_dir: str):
fig.patch.set_facecolor("#1a1a2e")
num_files = len(audio_files)
- # Create subplot grid with proper spacing
+ # Create subplot grid with proper spacing for waveforms and metrics
+ total_rows = num_files + 2 # Add one more row for metrics
gs = plt.GridSpec(
- num_files + 1, 2, height_ratios=[1.5] * num_files + [1], hspace=0.4, wspace=0.3
+ total_rows, 2, height_ratios=[1.5] * num_files + [1, 1], hspace=0.4, wspace=0.3
)
# Analyze all files first
@@ -216,48 +218,74 @@ def plot_analysis(audio_files: Dict[str, str], output_dir: str):
# Colors for voices
colors = ["#ff2a6d", "#05d9e8", "#d1f7ff"]
- # Create two subplots for metrics with similar scales
- # Left subplot: Brightness and Volume
- ax1 = plt.subplot(gs[num_files, 0])
- metrics1 = [
+ # Create metrics for each subplot
+ metrics = [
(
- "Brightness",
- [chars["spectral_centroid"] / 1000 for chars in all_chars.values()],
- "kHz",
- ),
- ("Volume", [chars["rms"] * 100 for chars in all_chars.values()], "RMS×100"),
- ]
-
- # Right subplot: Voice Pitch and Texture
- ax2 = plt.subplot(gs[num_files, 1])
- metrics2 = [
- (
- "Voice Pitch",
- [min(chars["dominant_frequencies"]) for chars in all_chars.values()],
- "Hz",
+ plt.subplot(gs[num_files, 0]),
+ [
+ (
+ "Volume",
+ [chars["rms"] * 100 for chars in all_chars.values()],
+ "RMS×100",
+ )
+ ],
),
(
- "Texture",
- [chars["zero_crossing_rate"] * 1000 for chars in all_chars.values()],
- "ZCR×1000",
+ plt.subplot(gs[num_files, 1]),
+ [
+ (
+ "Brightness",
+ [chars["spectral_centroid"] / 1000 for chars in all_chars.values()],
+ "kHz",
+ )
+ ],
+ ),
+ (
+ plt.subplot(gs[num_files + 1, 0]),
+ [
+ (
+ "Voice Pitch",
+ [
+ min(chars["dominant_frequencies"])
+ for chars in all_chars.values()
+ ],
+ "Hz",
+ )
+ ],
+ ),
+ (
+ plt.subplot(gs[num_files + 1, 1]),
+ [
+ (
+ "Texture",
+ [
+ chars["zero_crossing_rate"] * 1000
+ for chars in all_chars.values()
+ ],
+ "ZCR×1000",
+ )
+ ],
),
]
- def plot_grouped_bars(ax, metrics, show_legend=True):
- n_groups = len(metrics)
+ # Plot each metric
+ for i, (ax, metric_data) in enumerate(metrics):
n_voices = len(audio_files)
bar_width = 0.25
+ indices = np.array([0])
- indices = np.arange(n_groups)
+ values = metric_data[0][1]
+ max_val = max(values)
- # Get max value for y-axis scaling
- max_val = max(max(m[1]) for m in metrics)
-
- for i, (voice, color) in enumerate(zip(audio_files.keys(), colors)):
- values = [m[1][i] for m in metrics]
- offset = (i - n_voices / 2 + 0.5) * bar_width
+ for j, (voice, color) in enumerate(zip(audio_files.keys(), colors)):
+ offset = (j - n_voices / 2 + 0.5) * bar_width
bars = ax.bar(
- indices + offset, values, bar_width, label=voice, color=color, alpha=0.8
+ indices + offset,
+ [values[j]],
+ bar_width,
+ label=voice,
+ color=color,
+ alpha=0.8,
)
# Add value labels on top of bars
@@ -274,12 +302,12 @@ def plot_analysis(audio_files: Dict[str, str], output_dir: str):
)
ax.set_xticks(indices)
- ax.set_xticklabels([f"{m[0]}\n({m[2]})" for m in metrics])
-
- # Set y-axis limits with some padding
+ ax.set_xticklabels([f"{metric_data[0][0]}\n({metric_data[0][2]})"])
ax.set_ylim(0, max_val * 1.2)
+ ax.set_ylabel("Value")
- if show_legend:
+ # Only show legend on first metric plot
+ if i == 0:
ax.legend(
bbox_to_anchor=(1.05, 1),
loc="upper left",
@@ -287,22 +315,11 @@ def plot_analysis(audio_files: Dict[str, str], output_dir: str):
edgecolor="#ffffff",
)
- # Plot both subplots
- plot_grouped_bars(ax1, metrics1, show_legend=True)
- plot_grouped_bars(ax2, metrics2, show_legend=False)
+ # Style the subplot
+ setup_plot(fig, ax, metric_data[0][0])
- # Style both subplots
- setup_plot(fig, ax1, "Brightness and Volume")
- setup_plot(fig, ax2, "Voice Pitch and Texture")
-
- # Add y-axis labels
- ax1.set_ylabel("Value")
- ax2.set_ylabel("Value")
-
- # Adjust the figure size to accommodate the legend
- fig.set_size_inches(15, 15)
-
- # Add padding around the entire figure
+ # Adjust the figure size and padding
+ fig.set_size_inches(15, 20)
plt.subplots_adjust(right=0.85, top=0.95, bottom=0.05, left=0.1)
plt.savefig(os.path.join(output_dir, "analysis_comparison.png"), dpi=300)
print(f"Saved analysis comparison to {output_dir}/analysis_comparison.png")
@@ -332,7 +349,7 @@ def main():
)
parser.add_argument("--url", default="http://localhost:8880", help="API base URL")
parser.add_argument(
- "--output-dir",
+ "--output-dir",
default="examples/assorted_checks/test_combinations/output",
help="Output directory for audio files",
)
diff --git a/examples/assorted_checks/test_formats/test_audio_formats.py b/examples/assorted_checks/test_formats/test_audio_formats.py
index e126dec..68156b6 100644
--- a/examples/assorted_checks/test_formats/test_audio_formats.py
+++ b/examples/assorted_checks/test_formats/test_audio_formats.py
@@ -66,26 +66,27 @@ def plot_format_comparison(stats: list, output_dir: str):
for i, stat in enumerate(stats):
format_name = stat["format"].upper()
try:
- # Handle PCM format differently
- if stat["format"] == "pcm":
- # Read raw PCM data (16-bit mono)
- with open(
- os.path.join(output_dir, f"test_audio.{stat['format']}"), "rb"
- ) as f:
- raw_data = f.read()
- data = np.frombuffer(raw_data, dtype=np.int16)
- data = data.astype(np.float32) / 32768.0 # Convert to float [-1, 1]
- sr = 24000
- else:
- # Read other formats with soundfile
- data, sr = sf.read(
- os.path.join(output_dir, f"test_audio.{stat['format']}")
- )
+ file_path = os.path.join(output_dir, f"test_audio.{stat['format']}")
- # Plot waveform
+ if stat["format"] == "wav":
+ # Use scipy.io.wavfile for WAV files
+ sr, data = wavfile.read(file_path)
+ data = data.astype(np.float32) / 32768.0 # Convert to float [-1, 1]
+ elif stat["format"] == "pcm":
+ # Read raw 16-bit signed little-endian PCM data at 24kHz
+ data = np.frombuffer(
+ open(file_path, "rb").read(), dtype=" dict:
"""Get audio file statistics"""
file_size = os.path.getsize(file_path)
file_size_kb = file_size / 1024 # Convert to KB
+ format_name = Path(file_path).suffix[1:]
- try:
- # Try reading with soundfile first
+ if format_name == "wav":
+ # Use scipy.io.wavfile for WAV files
+ sample_rate, data = wavfile.read(file_path)
+ data = data.astype(np.float32) / 32768.0 # Convert to float [-1, 1]
+ duration = len(data) / sample_rate
+ channels = 1 if len(data.shape) == 1 else data.shape[1]
+ elif format_name == "pcm":
+ # For PCM, read raw 16-bit signed little-endian PCM data at 24kHz
+ data = np.frombuffer(
+ open(file_path, "rb").read(), dtype=" List[str]:
"""Create a variety of test cases with different characteristics"""
-
+
# Helper to create random text with specific patterns
def random_text(length: int) -> str:
- return ''.join(random.choice(string.ascii_letters + string.digits + " .,!?") for _ in range(length))
-
+ return "".join(
+ random.choice(string.ascii_letters + string.digits + " .,!?")
+ for _ in range(length)
+ )
+
test_cases = []
-
+
# Base test cases that hit specific patterns
base_cases = [
"Dr. Smith and Mr. Jones discussed the $1,234.56 million investment.",
@@ -21,10 +25,10 @@ def create_test_cases() -> List[str]:
"X's and Y's properties cost £50 million in the 1990s",
"こんにちは。今日は!",
]
-
+
# Add base cases
test_cases.extend(base_cases)
-
+
# Add variations with random content
for length in [100, 1000, 10000]:
# Create 3 variations of each length
@@ -35,23 +39,24 @@ def create_test_cases() -> List[str]:
text = text.replace(text[30:40], "$1,234.56")
text = text.replace(text[50:60], "A.B.C. xyz")
test_cases.append(text)
-
+
return test_cases
+
class TextNormalizerInline:
"""Text normalizer using inline patterns"""
-
+
def normalize(self, text: str) -> str:
# Replace quotes and brackets
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
text = text.replace("«", chr(8220)).replace("»", chr(8221))
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
text = text.replace("(", "«").replace(")", "»")
-
+
# Handle CJK punctuation
for a, b in zip("、。!,:;?", ",.!,:;?"):
text = text.replace(a, b + " ")
-
+
text = re.sub(r"[^\S \n]", " ", text)
text = re.sub(r" +", " ", text)
text = re.sub(r"(?<=\n) +(?=\n)", "", text)
@@ -61,108 +66,132 @@ class TextNormalizerInline:
text = re.sub(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))", "Mrs", text)
text = re.sub(r"\betc\.(?! [A-Z])", "etc", text)
text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text)
- text = re.sub(r"\d*\.\d+|\b\d{4}s?\b|(? str:
# Replace quotes and brackets
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
text = text.replace("«", chr(8220)).replace("»", chr(8221))
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
text = text.replace("(", "«").replace(")", "»")
-
+
# Handle CJK punctuation
for a, b in zip("、。!,:;?", ",.!,:;?"):
text = text.replace(a, b + " ")
-
+
# Use compiled patterns
- text = self.patterns['whitespace'].sub(" ", text)
- text = self.patterns['multi_space'].sub(" ", text)
- text = self.patterns['newline_space'].sub("", text)
- text = self.patterns['doctor'].sub("Doctor", text)
- text = self.patterns['mister'].sub("Mister", text)
- text = self.patterns['miss'].sub("Miss", text)
- text = self.patterns['mrs'].sub("Mrs", text)
- text = self.patterns['etc'].sub("etc", text)
- text = self.patterns['yeah'].sub(r"\1e'a", text)
- text = self.patterns['numbers'].sub(split_num, text)
- text = self.patterns['comma_in_number'].sub("", text)
- text = self.patterns['money'].sub(handle_money, text)
- text = self.patterns['decimal'].sub(handle_decimal, text)
- text = self.patterns['range'].sub(" to ", text)
- text = self.patterns['s_after_number'].sub(" S", text)
- text = self.patterns['possessive_s'].sub("'S", text)
- text = self.patterns['x_possessive'].sub("s", text)
- text = self.patterns['initials'].sub(lambda m: m.group().replace(".", "-"), text)
- text = self.patterns['single_initial'].sub("-", text)
-
+ text = self.patterns["whitespace"].sub(" ", text)
+ text = self.patterns["multi_space"].sub(" ", text)
+ text = self.patterns["newline_space"].sub("", text)
+ text = self.patterns["doctor"].sub("Doctor", text)
+ text = self.patterns["mister"].sub("Mister", text)
+ text = self.patterns["miss"].sub("Miss", text)
+ text = self.patterns["mrs"].sub("Mrs", text)
+ text = self.patterns["etc"].sub("etc", text)
+ text = self.patterns["yeah"].sub(r"\1e'a", text)
+ text = self.patterns["numbers"].sub(split_num, text)
+ text = self.patterns["comma_in_number"].sub("", text)
+ text = self.patterns["money"].sub(handle_money, text)
+ text = self.patterns["decimal"].sub(handle_decimal, text)
+ text = self.patterns["range"].sub(" to ", text)
+ text = self.patterns["s_after_number"].sub(" S", text)
+ text = self.patterns["possessive_s"].sub("'S", text)
+ text = self.patterns["x_possessive"].sub("s", text)
+ text = self.patterns["initials"].sub(
+ lambda m: m.group().replace(".", "-"), text
+ )
+ text = self.patterns["single_initial"].sub("-", text)
+
return text.strip()
+
class TextNormalizerHybrid:
"""Text normalizer using hybrid approach - compile only complex/frequent patterns"""
-
+
def __init__(self):
# Only compile patterns that are complex or frequently used
self.patterns = {
- 'whitespace': re.compile(r"[^\S \n]"),
- 'numbers': re.compile(r"\d*\.\d+|\b\d{4}s?\b|(? str:
# Replace quotes and brackets
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
text = text.replace("«", chr(8220)).replace("»", chr(8221))
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
text = text.replace("(", "«").replace(")", "»")
-
+
# Handle CJK punctuation
for a, b in zip("、。!,:;?", ",.!,:;?"):
text = text.replace(a, b + " ")
-
+
# Use compiled patterns for complex operations
- text = self.patterns['whitespace'].sub(" ", text)
- text = self.patterns['numbers'].sub(split_num, text)
- text = self.patterns['money'].sub(handle_money, text)
- text = self.patterns['initials'].sub(lambda m: m.group().replace(".", "-"), text)
-
+ text = self.patterns["whitespace"].sub(" ", text)
+ text = self.patterns["numbers"].sub(split_num, text)
+ text = self.patterns["money"].sub(handle_money, text)
+ text = self.patterns["initials"].sub(
+ lambda m: m.group().replace(".", "-"), text
+ )
+
# Use inline patterns for simpler operations
text = re.sub(r" +", " ", text)
text = re.sub(r"(?<=\n) +(?=\n)", "", text)
@@ -179,9 +208,10 @@ class TextNormalizerHybrid:
text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
text = re.sub(r"(?<=X')S\b", "s", text)
text = re.sub(r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text)
-
+
return text.strip()
+
def split_num(match: re.Match) -> str:
"""Split numbers for TTS processing"""
num = match.group(0)
@@ -192,61 +222,70 @@ def split_num(match: re.Match) -> str:
return f"{num[:-1]} s"
return num
+
def handle_money(match: re.Match) -> str:
"""Format money strings for TTS"""
text = match.group(0)
return text.replace("$", " dollars ").replace("£", " pounds ")
+
def handle_decimal(match: re.Match) -> str:
"""Format decimal numbers for TTS"""
num = match.group(0)
return num.replace(".", " point ")
-def benchmark_normalizers(test_cases: List[str], iterations: int = 100) -> Tuple[float, float, float]:
+
+def benchmark_normalizers(
+ test_cases: List[str], iterations: int = 100
+) -> Tuple[float, float, float]:
"""Benchmark all three implementations"""
-
+
normalizers = {
- 'inline': TextNormalizerInline(),
- 'compiled': TextNormalizerCompiled(),
- 'hybrid': TextNormalizerHybrid()
+ "inline": TextNormalizerInline(),
+ "compiled": TextNormalizerCompiled(),
+ "hybrid": TextNormalizerHybrid(),
}
-
+
results = {}
-
+
# Test each normalizer
for name, normalizer in normalizers.items():
start = time.perf_counter()
-
+
# Run normalizations
for _ in range(iterations):
for test in test_cases:
normalizer.normalize(test)
-
+
results[name] = time.perf_counter() - start
-
+
return results
+
def verify_outputs(test_cases: List[str]) -> bool:
"""Verify that all implementations produce identical output"""
normalizers = {
- 'inline': TextNormalizerInline(),
- 'compiled': TextNormalizerCompiled(),
- 'hybrid': TextNormalizerHybrid()
+ "inline": TextNormalizerInline(),
+ "compiled": TextNormalizerCompiled(),
+ "hybrid": TextNormalizerHybrid(),
}
-
+
for test in test_cases:
results = [norm.normalize(test) for norm in normalizers.values()]
if not all(r == results[0] for r in results):
return False
return True
+
def main():
# Create test cases
print("Generating test cases...")
test_cases = create_test_cases()
total_chars = sum(len(t) for t in test_cases)
- print(f"Created {len(test_cases)} test cases, total size: {total_chars:,} characters")
-
+ print(
+ f"Created {len(test_cases)} test cases, total size: {total_chars:,} characters"
+ )
+
# Verify output consistency
print("\nVerifying output consistency...")
if verify_outputs(test_cases):
@@ -254,15 +293,16 @@ def main():
else:
print("✗ Warning: Implementations produce different outputs!")
return
-
+
# Run benchmarks
print("\nRunning benchmarks...")
iterations = 100
results = benchmark_normalizers(test_cases, iterations)
-
+
# Print results
print(f"\nResults for {iterations} iterations: ")
for name, time_taken in results.items():
print(f"{name.capitalize()}: {time_taken:.3f}s")
-main()
\ No newline at end of file
+
+main()
diff --git a/examples/assorted_checks/validate_wav.py b/examples/assorted_checks/validate_wav.py
index 20122ef..844655a 100644
--- a/examples/assorted_checks/validate_wav.py
+++ b/examples/assorted_checks/validate_wav.py
@@ -1,8 +1,11 @@
+import argparse
+from typing import Any, Dict
+from pathlib import Path
+
import numpy as np
import soundfile as sf
-import argparse
-from pathlib import Path
-from typing import Dict, Any
+from tqdm import tqdm
+
def validate_tts(wav_path: str) -> dict:
"""
@@ -13,34 +16,40 @@ def validate_tts(wav_path: str) -> dict:
audio, sr = sf.read(wav_path)
if len(audio.shape) > 1:
audio = np.mean(audio, axis=1)
-
+
duration = len(audio) / sr
issues = []
-
+
# Basic quality checks
abs_audio = np.abs(audio)
stats = {
- 'rms': float(np.sqrt(np.mean(audio**2))),
- 'peak': float(np.max(abs_audio)),
- 'dc_offset': float(np.mean(audio))
+ "rms": float(np.sqrt(np.mean(audio**2))),
+ "peak": float(np.max(abs_audio)),
+ "dc_offset": float(np.mean(audio)),
}
-
+
clip_count = np.sum(abs_audio >= 0.99)
clip_percent = (clip_count / len(audio)) * 100
-
+
if duration < 0.1:
- issues.append("WARNING: Audio is suspiciously short - possible failed generation")
-
- if stats['peak'] >= 1.0:
+ issues.append(
+ "WARNING: Audio is suspiciously short - possible failed generation"
+ )
+
+ if stats["peak"] >= 1.0:
if clip_percent > 1.0:
- issues.append(f"WARNING: Significant clipping detected ({clip_percent:.2e}% of samples)")
+ issues.append(
+ f"WARNING: Significant clipping detected ({clip_percent:.2e}% of samples)"
+ )
elif clip_percent > 0.01:
- issues.append(f"INFO: Minor peak limiting detected ({clip_percent:.2e}% of samples)")
-
- if stats['rms'] < 0.01:
+ issues.append(
+ f"INFO: Minor peak limiting detected ({clip_percent:.2e}% of samples)"
+ )
+
+ if stats["rms"] < 0.01:
issues.append("WARNING: Audio is very quiet - possible failed generation")
-
- if abs(stats['dc_offset']) > 0.1:
+
+ if abs(stats["dc_offset"]) > 0.1:
issues.append(f"WARNING: High DC offset ({stats['dc_offset']:.3f})")
# Check for long silence gaps
@@ -51,66 +60,79 @@ def validate_tts(wav_path: str) -> dict:
window_size = int(min_silence * sr)
silence_count = 0
last_silence = -1
-
+
start_idx = int(0.2 * sr) # Skip first 0.2s
- for i in range(start_idx, len(db) - window_size, window_size):
- window = db[i:i+window_size]
+ for i in tqdm(
+ range(start_idx, len(db) - window_size, window_size),
+ desc="Checking for silence",
+ ):
+ window = db[i : i + window_size]
if np.mean(window) < silence_threshold:
silent_ratio = np.mean(window < silence_threshold)
if silent_ratio > 0.9:
- if last_silence == -1 or (i/sr - last_silence) > 2.0:
+ if last_silence == -1 or (i / sr - last_silence) > 2.0:
silence_count += 1
- last_silence = i/sr
- issues.append(f"WARNING: Long silence detected at {i/sr:.2f}s (duration: {min_silence:.1f}s)")
-
+ last_silence = i / sr
+ issues.append(
+ f"WARNING: Long silence detected at {i/sr:.2f}s (duration: {min_silence:.1f}s)"
+ )
+
if silence_count > 2:
- issues.append(f"WARNING: Multiple long silences found ({silence_count} total)")
+ issues.append(
+ f"WARNING: Multiple long silences found ({silence_count} total)"
+ )
# Detect audio artifacts
diff = np.diff(audio)
abs_diff = np.abs(diff)
window_size = min(int(0.005 * sr), 256)
- window = np.ones(window_size)/window_size
- local_avg_diff = np.convolve(abs_diff, window, mode='same')
-
+ window = np.ones(window_size) / window_size
+ local_avg_diff = np.convolve(abs_diff, window, mode="same")
+
spikes = (abs_diff > (10 * local_avg_diff)) & (abs_diff > 0.1)
artifact_indices = np.nonzero(spikes)[0]
-
+
artifacts = []
if len(artifact_indices) > 0:
gaps = np.diff(artifact_indices)
min_gap = int(0.005 * sr)
break_points = np.nonzero(gaps > min_gap)[0] + 1
groups = np.split(artifact_indices, break_points)
-
+
for group in groups:
if len(group) >= 5:
severity = np.max(abs_diff[group])
if severity > 0.2:
- center_idx = group[len(group)//2]
- artifacts.append({
- 'time': float(center_idx/sr), # Ensure float for consistent timing
- 'severity': float(severity)
- })
+ center_idx = group[len(group) // 2]
+ artifacts.append(
+ {
+ "time": float(
+ center_idx / sr
+ ), # Ensure float for consistent timing
+ "severity": float(severity),
+ }
+ )
issues.append(
f"WARNING: Audio discontinuity at {center_idx/sr:.3f}s "
f"(severity: {severity:.3f})"
)
# Check for repeated speech segments
- for chunk_duration in [5.0, 10.0]:
+ for chunk_duration in tqdm(
+ [0.5, 2.5, 5.0, 10.0], desc="Checking for repeated speech"
+ ):
chunk_size = int(chunk_duration * sr)
overlap = int(0.2 * chunk_size)
-
- for i in range(0, len(audio) - 2*chunk_size, overlap):
- chunk1 = audio[i:i+chunk_size]
- chunk2 = audio[i+chunk_size:i+2*chunk_size]
-
+
+ for i in range(0, len(audio) - 2 * chunk_size, overlap):
+ chunk1 = audio[i : i + chunk_size]
+ chunk2 = audio[i + chunk_size : i + 2 * chunk_size]
+
if np.mean(np.abs(chunk1)) < 0.01 or np.mean(np.abs(chunk2)) < 0.01:
continue
-
+
try:
- correlation = np.corrcoef(chunk1, chunk2)[0,1]
+ correlation = np.corrcoef(chunk1, chunk2)[0, 1]
if not np.isnan(correlation) and correlation > 0.92:
issues.append(
f"WARNING: Possible repeated speech at {i/sr:.1f}s "
@@ -128,92 +150,113 @@ def validate_tts(wav_path: str) -> dict:
"rms_level": f"{stats['rms']:.3f}",
"dc_offset": f"{stats['dc_offset']:.3f}",
"artifact_count": len(artifacts),
- "artifact_locations": [a['time'] for a in artifacts],
- "artifact_severities": [a['severity'] for a in artifacts],
+ "artifact_locations": [a["time"] for a in artifacts],
+ "artifact_severities": [a["severity"] for a in artifacts],
"issues": issues,
- "valid": len(issues) == 0
- }
-
- except Exception as e:
- return {
- "file": wav_path,
- "error": str(e),
- "valid": False
+ "valid": len(issues) == 0,
}
-def generate_analysis_plots(wav_path: str, output_dir: str, validation_result: Dict[str, Any]):
+ except Exception as e:
+ return {"file": wav_path, "error": str(e), "valid": False}
+
+
+def generate_analysis_plots(
+ wav_path: str, output_dir: str, validation_result: Dict[str, Any]
+):
"""
Generate analysis plots for audio file with time-aligned visualizations.
"""
import matplotlib.pyplot as plt
from scipy.signal import spectrogram
-
+
# Load audio
audio, sr = sf.read(wav_path)
if len(audio.shape) > 1:
audio = np.mean(audio, axis=1)
-
+
# Create figure with shared x-axis
fig = plt.figure(figsize=(15, 8))
gs = plt.GridSpec(2, 1, height_ratios=[1.2, 0.8], hspace=0.1)
ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1], sharex=ax1)
-
+
# Calculate spectrogram
nperseg = 2048
noverlap = 1536
- f, t, Sxx = spectrogram(audio, sr, nperseg=nperseg, noverlap=noverlap,
- window='hann', scaling='spectrum')
-
+ f, t, Sxx = spectrogram(
+ audio, sr, nperseg=nperseg, noverlap=noverlap, window="hann", scaling="spectrum"
+ )
+
# Plot spectrogram
- im = ax1.pcolormesh(t, f, 10 * np.log10(Sxx + 1e-10),
- shading='gouraud', cmap='viridis',
- vmin=-100, vmax=-20)
- ax1.set_ylabel('Frequency [Hz]', fontsize=10)
- cbar = plt.colorbar(im, ax=ax1, label='dB')
- ax1.set_title('Spectrogram', pad=10, fontsize=12)
-
+ im = ax1.pcolormesh(
+ t,
+ f,
+ 10 * np.log10(Sxx + 1e-10),
+ shading="gouraud",
+ cmap="viridis",
+ vmin=-100,
+ vmax=-20,
+ )
+ ax1.set_ylabel("Frequency [Hz]", fontsize=10)
+ cbar = plt.colorbar(im, ax=ax1, label="dB")
+ ax1.set_title("Spectrogram", pad=10, fontsize=12)
+
# Plot waveform with exact time alignment
times = np.arange(len(audio)) / sr
- ax2.plot(times, audio, color='#2E5596', alpha=0.7, linewidth=0.5, label='Audio')
- ax2.set_ylabel('Amplitude', fontsize=10)
- ax2.set_xlabel('Time [sec]', fontsize=10)
+ ax2.plot(times, audio, color="#2E5596", alpha=0.7, linewidth=0.5, label="Audio")
+ ax2.set_ylabel("Amplitude", fontsize=10)
+ ax2.set_xlabel("Time [sec]", fontsize=10)
ax2.grid(True, alpha=0.2)
-
+
# Add artifact markers
- if 'artifact_locations' in validation_result and validation_result['artifact_locations']:
- for loc in validation_result['artifact_locations']:
- ax1.axvline(x=loc, color='red', alpha=0.7, linewidth=2)
- ax2.axvline(x=loc, color='red', alpha=0.7, linewidth=2, label='Detected Artifacts')
-
+ if (
+ "artifact_locations" in validation_result
+ and validation_result["artifact_locations"]
+ ):
+ for loc in validation_result["artifact_locations"]:
+ ax1.axvline(x=loc, color="red", alpha=0.7, linewidth=2)
+ ax2.axvline(
+ x=loc, color="red", alpha=0.7, linewidth=2, label="Detected Artifacts"
+ )
+
# Add legend to both plots
- if len(validation_result['artifact_locations']) > 0:
- ax1.plot([], [], color='red', linewidth=2, label='Detected Artifacts')
- ax1.legend(loc='upper right', fontsize=8)
+ if len(validation_result["artifact_locations"]) > 0:
+ ax1.plot([], [], color="red", linewidth=2, label="Detected Artifacts")
+ ax1.legend(loc="upper right", fontsize=8)
# Only add unique labels to legend
handles, labels = ax2.get_legend_handles_labels()
unique_labels = dict(zip(labels, handles))
- ax2.legend(unique_labels.values(), unique_labels.keys(),
- loc='upper right', fontsize=8)
-
+ ax2.legend(
+ unique_labels.values(),
+ unique_labels.keys(),
+ loc="upper right",
+ fontsize=8,
+ )
+
# Set common x limits
- xlim = (0, len(audio)/sr)
+ xlim = (0, len(audio) / sr)
ax1.set_xlim(xlim)
ax2.set_xlim(xlim)
og_filename = Path(wav_path).name.split(".")[0]
# Save plot
- plt.savefig(Path(output_dir) / f"{og_filename}_audio_analysis.png", dpi=300, bbox_inches='tight')
+ plt.savefig(
+ Path(output_dir) / f"{og_filename}_audio_analysis.png",
+ dpi=300,
+ bbox_inches="tight",
+ )
plt.close()
-if __name__ == "__main__":
- wav_file = r"C:\Users\jerem\Desktop\Kokoro-FastAPI\examples\output.wav"
- silent=False
+if __name__ == "__main__":
+ wav_file = r"C:\Users\jerem\Desktop\Kokoro-FastAPI\examples\assorted_checks\benchmarks\output_audio\chunk_600_tokens.wav"
+ silent = False
+
+ print(f"\n\n Processing:\n\t{wav_file}")
result = validate_tts(wav_file)
if not silent:
wav_root_dir = Path(wav_file).parent
generate_analysis_plots(wav_file, wav_root_dir, result)
-
+
print(f"\nValidating: {result['file']}")
if "error" in result:
print(f"Error: {result['error']}")
@@ -224,10 +267,10 @@ if __name__ == "__main__":
print(f"RMS Level: {result['rms_level']}")
print(f"DC Offset: {result['dc_offset']}")
print(f"Detected Artifacts: {result['artifact_count']}")
-
+
if result["issues"]:
print("\nIssues Found:")
for issue in result["issues"]:
print(f"- {issue}")
else:
- print("\nNo issues found")
\ No newline at end of file
+ print("\nNo issues found")
diff --git a/examples/assorted_checks/validate_wavs.py b/examples/assorted_checks/validate_wavs.py
index a37c043..ebf114f 100644
--- a/examples/assorted_checks/validate_wavs.py
+++ b/examples/assorted_checks/validate_wavs.py
@@ -1,7 +1,9 @@
import argparse
from pathlib import Path
+
from validate_wav import validate_tts
+
def print_validation_result(result: dict, rel_path: Path):
"""Print full validation details for a single file."""
print(f"\nValidating: {rel_path}")
@@ -13,7 +15,7 @@ def print_validation_result(result: dict, rel_path: Path):
print(f"Peak Amplitude: {result['peak_amplitude']}")
print(f"RMS Level: {result['rms_level']}")
print(f"DC Offset: {result['dc_offset']}")
-
+
if result["issues"]:
print("\nIssues Found:")
for issue in result["issues"]:
@@ -21,25 +23,26 @@ def print_validation_result(result: dict, rel_path: Path):
else:
print("\nNo issues found")
+
def validate_directory(directory: str):
"""Validate all wav files in a directory with detailed output and summary."""
dir_path = Path(directory)
-
+
# Find all wav files (including nested directories)
wav_files = list(dir_path.rglob("*.wav"))
wav_files.extend(dir_path.rglob("*.mp3")) # Also check mp3s
wav_files = sorted(wav_files)
-
+
if not wav_files:
print(f"No .wav or .mp3 files found in {directory}")
return
-
+
print(f"Found {len(wav_files)} files in {directory}")
print("=" * 80)
-
+
# Store results for summary
results = []
-
+
# Detailed validation output
for wav_file in wav_files:
result = validate_tts(str(wav_file))
@@ -47,7 +50,7 @@ def validate_directory(directory: str):
print_validation_result(result, rel_path)
results.append((rel_path, result))
print("=" * 80)
-
+
# Summary with detailed issues
print("\nSUMMARY:")
for rel_path, result in results:
@@ -58,15 +61,18 @@ def validate_directory(directory: str):
issues = result["issues"]
first_issue = issues[0].replace("WARNING: ", "")
if len(issues) > 1:
- print(f"{rel_path}: FAIL - {first_issue} (+{len(issues)-1} more issues)")
+ print(
+ f"{rel_path}: FAIL - {first_issue} (+{len(issues)-1} more issues)"
+ )
else:
print(f"{rel_path}: FAIL - {first_issue}")
else:
print(f"{rel_path}: PASS")
+
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Batch validate TTS wav files")
parser.add_argument("directory", help="Directory containing wav files to validate")
args = parser.parse_args()
-
+
validate_directory(args.directory)
diff --git a/examples/output.wav b/examples/output.wav
deleted file mode 100644
index 7915e5a..0000000
Binary files a/examples/output.wav and /dev/null differ
diff --git a/examples/output_audio_analysis.png b/examples/output_audio_analysis.png
deleted file mode 100644
index 8d0541d..0000000
Binary files a/examples/output_audio_analysis.png and /dev/null differ
diff --git a/examples/speech.mp3 b/examples/speech.mp3
deleted file mode 100644
index c0dc9b0..0000000
Binary files a/examples/speech.mp3 and /dev/null differ
diff --git a/requirements.txt b/requirements.txt
index 284620c..365e005 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ numpy==2.2.1
scipy==1.14.1
# Audio processing
-soundfile==0.12.1
+soundfile==0.13.0
# Text processing
phonemizer==3.3.0