diff --git a/.coveragerc b/.coveragerc index dab8655..c66579a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -6,6 +6,7 @@ omit = Kokoro-82M/* MagicMock/* test_*.py + examples/* [report] exclude_lines = diff --git a/.gitignore b/.gitignore index aebbfa7..f61cc2d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ -output/ +output/* +output_audio/* ui/data/* *.db @@ -16,3 +17,10 @@ env/ .coverage +examples/assorted_checks/benchmarks/output_audio/* +examples/assorted_checks/test_combinations/output/* +examples/assorted_checks/test_openai/output/* + +examples/assorted_checks/test_voices/output/* +examples/assorted_checks/test_formats/output/* +ui/RepoScreenshot.png diff --git a/CHANGELOG.md b/CHANGELOG.md index 4194878..6303af1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,20 @@ Notable changes to this project will be documented in this file. +## 2025-01-04 +### Added +- ONNX Support: + - Added single batch ONNX support for CPU inference + - Roughly 0.4 RTF (2.4x real-time speed) + +### Modified +- Code Refactoring: + - Work on modularizing phonemizer and tokenizer into separate services + - Incorporated these services into a dev endpoint +- Testing and Benchmarking: + - Cleaned up benchmarking scripts + - Cleaned up test scripts + - Added auto-WAV validation scripts ## 2025-01-02 - Audio Format Support: diff --git a/Dockerfile.cpu b/Dockerfile.cpu index 959e555..e9f2d3b 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -10,8 +10,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Install PyTorch CPU version -RUN pip3 install --no-cache-dir torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu +# Install PyTorch CPU version and ONNX runtime +RUN pip3 install --no-cache-dir torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu && \ + pip3 install --no-cache-dir onnxruntime==1.20.1 # Install all other dependencies from requirements.txt COPY requirements.txt . diff --git a/README.md b/README.md index 9732bd6..2e38852 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,8 @@

# Kokoro TTS API -[![Tests](https://img.shields.io/badge/tests-89%20passed-darkgreen)]() -[![Coverage](https://img.shields.io/badge/coverage-80%25-darkgreen)]() +[![Tests](https://img.shields.io/badge/tests-95%20passed-darkgreen)]() +[![Coverage](https://img.shields.io/badge/coverage-72%25-darkgreen)]() [![Tested at Model Commit](https://img.shields.io/badge/last--tested--model--commit-a67f113-blue)](https://huggingface.co/hexgrad/Kokoro-82M/tree/c3b0d86e2a980e027ef71c28819ea02e351c2667) [![Try on Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Try%20on-Spaces-blue)](https://huggingface.co/spaces/Remsky/Kokoro-TTS-Zero) Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model @@ -187,15 +187,13 @@ Key Performance Metrics: GPU Vs. CPU ```bash -# GPU: Requires NVIDIA GPU with CUDA 12.1 support +# GPU: Requires NVIDIA GPU with CUDA 12.1 support (~35x realtime speed) docker compose up --build -# CPU: ~10x slower than GPU inference +# CPU: ONNX optimized inference (~2.4x realtime speed) docker compose -f docker-compose.cpu.yml up --build ``` -*Note: CPU Inference is currently a very basic implementation, and not heavily tested* -
diff --git a/api/src/core/config.py b/api/src/core/config.py index 52aea19..5348730 100644 --- a/api/src/core/config.py +++ b/api/src/core/config.py @@ -14,9 +14,18 @@ class Settings(BaseSettings): output_dir_size_limit_mb: float = 500.0 # Maximum size of output directory in MB default_voice: str = "af" model_dir: str = "/app/Kokoro-82M" # Base directory for model files - model_path: str = "kokoro-v0_19.pth" + pytorch_model_path: str = "kokoro-v0_19.pth" + onnx_model_path: str = "kokoro-v0_19.onnx" voices_dir: str = "voices" sample_rate: int = 24000 + + # ONNX Optimization Settings + onnx_num_threads: int = 4 # Number of threads for intra-op parallelism + onnx_inter_op_threads: int = 4 # Number of threads for inter-op parallelism + onnx_execution_mode: str = "parallel" # parallel or sequential + onnx_optimization_level: str = "all" # all, basic, or disabled + onnx_memory_pattern: bool = True # Enable memory pattern optimization + onnx_arena_extend_strategy: str = "kNextPowerOfTwo" # Memory allocation strategy class Config: env_file = ".env" diff --git a/api/src/core/kokoro.py b/api/src/core/kokoro.py new file mode 100644 index 0000000..e3d6c39 --- /dev/null +++ b/api/src/core/kokoro.py @@ -0,0 +1,185 @@ +import re + +import torch +import phonemizer + + +def split_num(num): + num = num.group() + if "." in num: + return num + elif ":" in num: + h, m = [int(n) for n in num.split(":")] + if m == 0: + return f"{h} o'clock" + elif m < 10: + return f"{h} oh {m}" + return f"{h} {m}" + year = int(num[:4]) + if year < 1100 or year % 1000 < 10: + return num + left, right = num[:2], int(num[2:4]) + s = "s" if num.endswith("s") else "" + if 100 <= year % 1000 <= 999: + if right == 0: + return f"{left} hundred{s}" + elif right < 10: + return f"{left} oh {right}{s}" + return f"{left} {right}{s}" + + +def flip_money(m): + m = m.group() + bill = "dollar" if m[0] == "$" else "pound" + if m[-1].isalpha(): + return f"{m[1:]} {bill}s" + elif "." not in m: + s = "" if m[1:] == "1" else "s" + return f"{m[1:]} {bill}{s}" + b, c = m[1:].split(".") + s = "" if b == "1" else "s" + c = int(c.ljust(2, "0")) + coins = ( + f"cent{'' if c == 1 else 's'}" + if m[0] == "$" + else ("penny" if c == 1 else "pence") + ) + return f"{b} {bill}{s} and {c} {coins}" + + +def point_num(num): + a, b = num.group().split(".") + return " point ".join([a, " ".join(b)]) + + +def normalize_text(text): + text = text.replace(chr(8216), "'").replace(chr(8217), "'") + text = text.replace("«", chr(8220)).replace("»", chr(8221)) + text = text.replace(chr(8220), '"').replace(chr(8221), '"') + text = text.replace("(", "«").replace(")", "»") + for a, b in zip("、。!,:;?", ",.!,:;?"): + text = text.replace(a, b + " ") + text = re.sub(r"[^\S \n]", " ", text) + text = re.sub(r" +", " ", text) + text = re.sub(r"(?<=\n) +(?=\n)", "", text) + text = re.sub(r"\bD[Rr]\.(?= [A-Z])", "Doctor", text) + text = re.sub(r"\b(?:Mr\.|MR\.(?= [A-Z]))", "Mister", text) + text = re.sub(r"\b(?:Ms\.|MS\.(?= [A-Z]))", "Miss", text) + text = re.sub(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))", "Mrs", text) + text = re.sub(r"\betc\.(?! [A-Z])", "etc", text) + text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text) + text = re.sub( + r"\d*\.\d+|\b\d{4}s?\b|(? 510: + tokens = tokens[:510] + print("Truncated to 510 tokens") + ref_s = voicepack[len(tokens)] + out = forward(model, tokens, ref_s, speed) + ps = "".join(next(k for k, v in VOCAB.items() if i == v) for i in tokens) + return out, ps diff --git a/api/src/main.py b/api/src/main.py index ebe2f53..c2a567e 100644 --- a/api/src/main.py +++ b/api/src/main.py @@ -10,8 +10,10 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from .core.config import settings -from .services.tts import TTSModel, TTSService +from .services.tts_model import TTSModel +from .services.tts_service import TTSService from .routers.openai_compatible import router as openai_router +from .routers.text_processing import router as text_router @asynccontextmanager @@ -20,8 +22,8 @@ async def lifespan(app: FastAPI): logger.info("Loading TTS model and voice packs...") # Initialize the main model with warm-up - model, voicepack_count = TTSModel.initialize() - logger.info(f"Model loaded and warmed up on {TTSModel._device}") + voicepack_count = TTSModel.setup() + logger.info(f"Model loaded and warmed up on {TTSModel.get_device()}") logger.info(f"{voicepack_count} voice packs loaded successfully") yield @@ -44,8 +46,9 @@ app.add_middleware( allow_headers=["*"], ) -# Include OpenAI compatible router +# Include routers app.include_router(openai_router, prefix="/v1") +app.include_router(text_router) # Health check endpoint diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py index 4183d39..6663d7b 100644 --- a/api/src/routers/openai_compatible.py +++ b/api/src/routers/openai_compatible.py @@ -3,7 +3,7 @@ from typing import List from loguru import logger from fastapi import Depends, Response, APIRouter, HTTPException -from ..services.tts import TTSService +from ..services.tts_service import TTSService from ..services.audio import AudioService from ..structures.schemas import OpenAISpeechRequest @@ -15,9 +15,7 @@ router = APIRouter( def get_tts_service() -> TTSService: """Dependency to get TTSService instance with database session""" - return TTSService( - start_worker=False - ) # Don't start worker thread for OpenAI endpoint + return TTSService() # Initialize TTSService with default settings @router.post("/audio/speech") diff --git a/api/src/routers/text_processing.py b/api/src/routers/text_processing.py new file mode 100644 index 0000000..9e1ce3a --- /dev/null +++ b/api/src/routers/text_processing.py @@ -0,0 +1,30 @@ +from fastapi import APIRouter +from ..structures.text_schemas import PhonemeRequest, PhonemeResponse +from ..services.text_processing import phonemize, tokenize + +router = APIRouter( + prefix="/text", + tags=["text processing"] +) + +@router.post("/phonemize", response_model=PhonemeResponse) +async def phonemize_text(request: PhonemeRequest) -> PhonemeResponse: + """Convert text to phonemes and tokens: Rough attempt + + Args: + request: Request containing text and language + + Returns: + Phonemes and token IDs + """ + # Get phonemes + phonemes = phonemize(request.text, request.language) + + # Get tokens + tokens = tokenize(phonemes) + tokens = [0] + tokens + [0] # Add start/end tokens + + return PhonemeResponse( + phonemes=phonemes, + tokens=tokens + ) diff --git a/api/src/services/__init__.py b/api/src/services/__init__.py index 46f2e93..82cf76e 100644 --- a/api/src/services/__init__.py +++ b/api/src/services/__init__.py @@ -1,3 +1,3 @@ -from .tts import TTSModel, TTSService +from .tts_service import TTSService -__all__ = ["TTSService", "TTSModel"] +__all__ = ["TTSService"] diff --git a/api/src/services/text_processing/__init__.py b/api/src/services/text_processing/__init__.py new file mode 100644 index 0000000..f945e18 --- /dev/null +++ b/api/src/services/text_processing/__init__.py @@ -0,0 +1,13 @@ +from .normalizer import normalize_text +from .phonemizer import phonemize, PhonemizerBackend, EspeakBackend +from .vocabulary import tokenize, decode_tokens, VOCAB + +__all__ = [ + 'normalize_text', + 'phonemize', + 'tokenize', + 'decode_tokens', + 'VOCAB', + 'PhonemizerBackend', + 'EspeakBackend' +] diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py new file mode 100644 index 0000000..db5b7db --- /dev/null +++ b/api/src/services/text_processing/normalizer.py @@ -0,0 +1,111 @@ +import re + +def split_num(num: re.Match) -> str: + """Handle number splitting for various formats""" + num = num.group() + if "." in num: + return num + elif ":" in num: + h, m = [int(n) for n in num.split(":")] + if m == 0: + return f"{h} o'clock" + elif m < 10: + return f"{h} oh {m}" + return f"{h} {m}" + year = int(num[:4]) + if year < 1100 or year % 1000 < 10: + return num + left, right = num[:2], int(num[2:4]) + s = "s" if num.endswith("s") else "" + if 100 <= year % 1000 <= 999: + if right == 0: + return f"{left} hundred{s}" + elif right < 10: + return f"{left} oh {right}{s}" + return f"{left} {right}{s}" + +def handle_money(m: re.Match) -> str: + """Convert money expressions to spoken form""" + m = m.group() + bill = "dollar" if m[0] == "$" else "pound" + if m[-1].isalpha(): + return f"{m[1:]} {bill}s" + elif "." not in m: + s = "" if m[1:] == "1" else "s" + return f"{m[1:]} {bill}{s}" + b, c = m[1:].split(".") + s = "" if b == "1" else "s" + c = int(c.ljust(2, "0")) + coins = ( + f"cent{'' if c == 1 else 's'}" + if m[0] == "$" + else ("penny" if c == 1 else "pence") + ) + return f"{b} {bill}{s} and {c} {coins}" + +def handle_decimal(num: re.Match) -> str: + """Convert decimal numbers to spoken form""" + a, b = num.group().split(".") + return " point ".join([a, " ".join(b)]) + +def normalize_text(text: str) -> str: + """Normalize text for TTS processing + + Args: + text: Input text to normalize + + Returns: + Normalized text + """ + # Replace quotes and brackets + text = text.replace(chr(8216), "'").replace(chr(8217), "'") + text = text.replace("«", chr(8220)).replace("»", chr(8221)) + text = text.replace(chr(8220), '"').replace(chr(8221), '"') + text = text.replace("(", "«").replace(")", "»") + + # Handle CJK punctuation + for a, b in zip("、。!,:;?", ",.!,:;?"): + text = text.replace(a, b + " ") + + # Clean up whitespace + text = re.sub(r"[^\S \n]", " ", text) + text = re.sub(r" +", " ", text) + text = re.sub(r"(?<=\n) +(?=\n)", "", text) + + # Handle titles and abbreviations + text = re.sub(r"\bD[Rr]\.(?= [A-Z])", "Doctor", text) + text = re.sub(r"\b(?:Mr\.|MR\.(?= [A-Z]))", "Mister", text) + text = re.sub(r"\b(?:Ms\.|MS\.(?= [A-Z]))", "Miss", text) + text = re.sub(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))", "Mrs", text) + text = re.sub(r"\betc\.(?! [A-Z])", "etc", text) + + # Handle common words + text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text) + + # Handle numbers and money + text = re.sub( + r"\d*\.\d+|\b\d{4}s?\b|(? str: + """Convert text to phonemes + + Args: + text: Text to convert to phonemes + + Returns: + Phonemized text + """ + pass + +class EspeakBackend(PhonemizerBackend): + """Espeak-based phonemizer implementation""" + + def __init__(self, language: str): + """Initialize espeak backend + + Args: + language: Language code ('en-us' or 'en-gb') + """ + self.backend = phonemizer.backend.EspeakBackend( + language=language, + preserve_punctuation=True, + with_stress=True + ) + self.language = language + + def phonemize(self, text: str) -> str: + """Convert text to phonemes using espeak + + Args: + text: Text to convert to phonemes + + Returns: + Phonemized text + """ + # Phonemize text + ps = self.backend.phonemize([text]) + ps = ps[0] if ps else "" + + # Handle special cases + ps = ps.replace("kəkˈoːɹoʊ", "kˈoʊkəɹoʊ").replace("kəkˈɔːɹəʊ", "kˈəʊkəɹəʊ") + ps = ps.replace("ʲ", "j").replace("r", "ɹ").replace("x", "k").replace("ɬ", "l") + ps = re.sub(r"(?<=[a-zɹː])(?=hˈʌndɹɪd)", " ", ps) + ps = re.sub(r' z(?=[;:,.!?¡¿—…"«»"" ]|$)', "z", ps) + + # Language-specific rules + if self.language == "en-us": + ps = re.sub(r"(?<=nˈaɪn)ti(?!ː)", "di", ps) + + return ps.strip() + +def create_phonemizer(language: str = "a") -> PhonemizerBackend: + """Factory function to create phonemizer backend + + Args: + language: Language code ('a' for US English, 'b' for British English) + + Returns: + Phonemizer backend instance + """ + # Map language codes to espeak language codes + lang_map = { + "a": "en-us", + "b": "en-gb" + } + + if language not in lang_map: + raise ValueError(f"Unsupported language code: {language}") + + return EspeakBackend(lang_map[language]) + +def phonemize(text: str, language: str = "a", normalize: bool = True) -> str: + """Convert text to phonemes + + Args: + text: Text to convert to phonemes + language: Language code ('a' for US English, 'b' for British English) + normalize: Whether to normalize text before phonemization + + Returns: + Phonemized text + """ + if normalize: + text = normalize_text(text) + + phonemizer = create_phonemizer(language) + return phonemizer.phonemize(text) diff --git a/api/src/services/text_processing/vocabulary.py b/api/src/services/text_processing/vocabulary.py new file mode 100644 index 0000000..66af961 --- /dev/null +++ b/api/src/services/text_processing/vocabulary.py @@ -0,0 +1,37 @@ +def get_vocab(): + """Get the vocabulary dictionary mapping characters to token IDs""" + _pad = "$" + _punctuation = ';:,.!?¡¿—…"«»"" ' + _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ" + + # Create vocabulary dictionary + symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa) + return {symbol: i for i, symbol in enumerate(symbols)} + +# Initialize vocabulary +VOCAB = get_vocab() + +def tokenize(phonemes: str) -> list[int]: + """Convert phonemes string to token IDs + + Args: + phonemes: String of phonemes to tokenize + + Returns: + List of token IDs + """ + return [i for i in map(VOCAB.get, phonemes) if i is not None] + +def decode_tokens(tokens: list[int]) -> str: + """Convert token IDs back to phonemes string + + Args: + tokens: List of token IDs + + Returns: + String of phonemes + """ + # Create reverse mapping + id_to_symbol = {i: s for s, i in VOCAB.items()} + return "".join(id_to_symbol[t] for t in tokens) diff --git a/api/src/services/tts.py b/api/src/services/tts.py deleted file mode 100644 index c1abd9f..0000000 --- a/api/src/services/tts.py +++ /dev/null @@ -1,286 +0,0 @@ -import io -import os -import re -import time -import threading -from typing import List, Tuple, Optional - -import numpy as np -import torch -import tiktoken -import scipy.io.wavfile as wavfile -from kokoro import generate, tokenize, phonemize, normalize_text -from loguru import logger -from models import build_model - -from ..core.config import settings - -enc = tiktoken.get_encoding("cl100k_base") - - -class TTSModel: - _instance = None - _device = None - _lock = threading.Lock() - - # Directory for all voices (copied base voices, and any created combined voices) - VOICES_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "voices") - - @classmethod - def initialize(cls): - """Initialize and warm up the model""" - with cls._lock: - if cls._instance is None: - # Initialize model - cls._device = "cuda" if torch.cuda.is_available() else "cpu" - logger.info(f"Initializing model on {cls._device}") - model_path = os.path.join(settings.model_dir, settings.model_path) - model = build_model(model_path, cls._device) - cls._instance = model - - # Ensure voices directory exists - os.makedirs(cls.VOICES_DIR, exist_ok=True) - - # Copy base voices to local directory - base_voices_dir = os.path.join(settings.model_dir, settings.voices_dir) - if os.path.exists(base_voices_dir): - for file in os.listdir(base_voices_dir): - if file.endswith(".pt"): - voice_name = file[:-3] - voice_path = os.path.join(cls.VOICES_DIR, file) - if not os.path.exists(voice_path): - try: - logger.info( - f"Copying base voice {voice_name} to voices directory" - ) - base_path = os.path.join(base_voices_dir, file) - voicepack = torch.load( - base_path, - map_location=cls._device, - weights_only=True, - ) - torch.save(voicepack, voice_path) - except Exception as e: - logger.error( - f"Error copying voice {voice_name}: {str(e)}" - ) - - # Warm up with default voice - try: - dummy_text = "Hello" - voice_path = os.path.join(cls.VOICES_DIR, "af.pt") - dummy_voicepack = torch.load( - voice_path, map_location=cls._device, weights_only=True - ) - generate(model, dummy_text, dummy_voicepack, lang="a", speed=1.0) - logger.info("Model warm-up complete") - except Exception as e: - logger.warning(f"Model warm-up failed: {e}") - - # Count voices in directory for validation - voice_count = len( - [f for f in os.listdir(cls.VOICES_DIR) if f.endswith(".pt")] - ) - return cls._instance, voice_count - - @classmethod - def get_instance(cls): - """Get the initialized instance or raise an error""" - if cls._instance is None: - raise RuntimeError("Model not initialized. Call initialize() first.") - return cls._instance, cls._device - - -class TTSService: - def __init__(self, output_dir: str = None, start_worker: bool = False): - self.output_dir = output_dir - self._ensure_voices() - if start_worker: - self.start_worker() - - def _ensure_voices(self): - """Copy base voices to local voices directory during initialization""" - os.makedirs(TTSModel.VOICES_DIR, exist_ok=True) - - base_voices_dir = os.path.join(settings.model_dir, settings.voices_dir) - if os.path.exists(base_voices_dir): - for file in os.listdir(base_voices_dir): - if file.endswith(".pt"): - voice_name = file[:-3] - voice_path = os.path.join(TTSModel.VOICES_DIR, file) - if not os.path.exists(voice_path): - try: - logger.info( - f"Copying base voice {voice_name} to voices directory" - ) - base_path = os.path.join(base_voices_dir, file) - voicepack = torch.load( - base_path, - map_location=TTSModel._device, - weights_only=True, - ) - torch.save(voicepack, voice_path) - except Exception as e: - logger.error(f"Error copying voice {voice_name}: {str(e)}") - - def _split_text(self, text: str) -> List[str]: - """Split text into sentences""" - return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()] - - def _get_voice_path(self, voice_name: str) -> Optional[str]: - """Get the path to a voice file. - - Args: - voice_name: Name of the voice to find - - Returns: - Path to the voice file if found, None otherwise - """ - voice_path = os.path.join(TTSModel.VOICES_DIR, f"{voice_name}.pt") - return voice_path if os.path.exists(voice_path) else None - - def _generate_audio( - self, text: str, voice: str, speed: float, stitch_long_output: bool = True - ) -> Tuple[torch.Tensor, float]: - """Generate audio and measure processing time""" - start_time = time.time() - - try: - # Normalize text once at the start - text = normalize_text(text) - if not text: - raise ValueError("Text is empty after preprocessing") - - # Check voice exists - voice_path = self._get_voice_path(voice) - if not voice_path: - raise ValueError(f"Voice not found: {voice}") - - # Load model and voice - model = TTSModel._instance - voicepack = torch.load( - voice_path, map_location=TTSModel._device, weights_only=True - ) - - # Generate audio with or without stitching - if stitch_long_output: - chunks = self._split_text(text) - audio_chunks = [] - - # Process all chunks with same model/voicepack instance - for i, chunk in enumerate(chunks): - try: - # Validate phonemization first - # ps = phonemize(chunk, voice[0]) - # tokens = tokenize(ps) - # logger.debug( - # f"Processing chunk {i + 1}/{len(chunks)}: {len(tokens)} tokens" - # ) - - # Only proceed if phonemization succeeded - chunk_audio, _ = generate( - model, chunk, voicepack, lang=voice[0], speed=speed - ) - if chunk_audio is not None: - audio_chunks.append(chunk_audio) - else: - logger.error( - f"No audio generated for chunk {i + 1}/{len(chunks)}" - ) - except Exception as e: - logger.error( - f"Failed to generate audio for chunk {i + 1}/{len(chunks)}: '{chunk}'. Error: {str(e)}" - ) - continue - - if not audio_chunks: - raise ValueError("No audio chunks were generated successfully") - - audio = ( - np.concatenate(audio_chunks) - if len(audio_chunks) > 1 - else audio_chunks[0] - ) - else: - audio, _ = generate(model, text, voicepack, lang=voice[0], speed=speed) - - processing_time = time.time() - start_time - return audio, processing_time - - except Exception as e: - print(f"Error in audio generation: {str(e)}") - raise - - def _save_audio(self, audio: torch.Tensor, filepath: str): - """Save audio to file""" - os.makedirs(os.path.dirname(filepath), exist_ok=True) - wavfile.write(filepath, 24000, audio) - - def _audio_to_bytes(self, audio: torch.Tensor) -> bytes: - """Convert audio tensor to WAV bytes""" - buffer = io.BytesIO() - wavfile.write(buffer, 24000, audio) - return buffer.getvalue() - - def combine_voices(self, voices: List[str]) -> str: - """Combine multiple voices into a new voice. - - Args: - voices: List of voice names to combine - - Returns: - Name of the combined voice - - Raises: - ValueError: If less than 2 voices provided or voice loading fails - RuntimeError: If voice combination or saving fails - """ - if len(voices) < 2: - raise ValueError("At least 2 voices are required for combination") - - # Load voices - t_voices: List[torch.Tensor] = [] - v_name: List[str] = [] - - for voice in voices: - try: - voice_path = os.path.join(TTSModel.VOICES_DIR, f"{voice}.pt") - voicepack = torch.load( - voice_path, map_location=TTSModel._device, weights_only=True - ) - t_voices.append(voicepack) - v_name.append(voice) - except Exception as e: - raise ValueError(f"Failed to load voice {voice}: {str(e)}") - - # Combine voices - try: - f: str = "_".join(v_name) - v = torch.mean(torch.stack(t_voices), dim=0) - combined_path = os.path.join(TTSModel.VOICES_DIR, f"{f}.pt") - - # Save combined voice - try: - torch.save(v, combined_path) - except Exception as e: - raise RuntimeError( - f"Failed to save combined voice to {combined_path}: {str(e)}" - ) - - return f - - except Exception as e: - if not isinstance(e, (ValueError, RuntimeError)): - raise RuntimeError(f"Error combining voices: {str(e)}") - raise - - def list_voices(self) -> List[str]: - """List all available voices""" - voices = [] - try: - for file in os.listdir(TTSModel.VOICES_DIR): - if file.endswith(".pt"): - voices.append(file[:-3]) # Remove .pt extension - except Exception as e: - logger.error(f"Error listing voices: {str(e)}") - return sorted(voices) diff --git a/api/src/services/tts_base.py b/api/src/services/tts_base.py new file mode 100644 index 0000000..f502373 --- /dev/null +++ b/api/src/services/tts_base.py @@ -0,0 +1,136 @@ +import os +import threading +from abc import ABC, abstractmethod +from typing import List, Tuple +import torch +import numpy as np +from loguru import logger + +from ..core.config import settings + +class TTSBaseModel(ABC): + _instance = None + _lock = threading.Lock() + _device = None + VOICES_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "voices") + + @classmethod + def setup(cls): + """Initialize model and setup voices""" + with cls._lock: + # Set device + cuda_available = torch.cuda.is_available() + logger.info(f"CUDA available: {cuda_available}") + if cuda_available: + try: + # Test CUDA device + test_tensor = torch.zeros(1).cuda() + logger.info("CUDA test successful") + model_path = os.path.join(settings.model_dir, settings.pytorch_model_path) + cls._device = "cuda" + except Exception as e: + logger.error(f"CUDA test failed: {e}") + cls._device = "cpu" + else: + cls._device = "cpu" + model_path = os.path.join(settings.model_dir, settings.onnx_model_path) + logger.info(f"Initializing model on {cls._device}") + + # Initialize model + if not cls.initialize(settings.model_dir, model_path=model_path): + raise RuntimeError(f"Failed to initialize {cls._device.upper()} model") + + # Setup voices directory + os.makedirs(cls.VOICES_DIR, exist_ok=True) + + # Copy base voices to local directory + base_voices_dir = os.path.join(settings.model_dir, settings.voices_dir) + if os.path.exists(base_voices_dir): + for file in os.listdir(base_voices_dir): + if file.endswith(".pt"): + voice_name = file[:-3] + voice_path = os.path.join(cls.VOICES_DIR, file) + if not os.path.exists(voice_path): + try: + logger.info(f"Copying base voice {voice_name} to voices directory") + base_path = os.path.join(base_voices_dir, file) + voicepack = torch.load(base_path, map_location=cls._device, weights_only=True) + torch.save(voicepack, voice_path) + except Exception as e: + logger.error(f"Error copying voice {voice_name}: {str(e)}") + + # Warm up with default voice + try: + dummy_text = "Hello" + voice_path = os.path.join(cls.VOICES_DIR, "af.pt") + dummy_voicepack = torch.load(voice_path, map_location=cls._device, weights_only=True) + + # Process text and generate audio + phonemes, tokens = cls.process_text(dummy_text, "a") + cls.generate_from_tokens(tokens, dummy_voicepack, 1.0) + + logger.info("Model warm-up complete") + except Exception as e: + logger.warning(f"Model warm-up failed: {e}") + + # Count voices in directory + voice_count = len([f for f in os.listdir(cls.VOICES_DIR) if f.endswith(".pt")]) + return voice_count + + @classmethod + @abstractmethod + def initialize(cls, model_dir: str, model_path: str = None): + """Initialize the model""" + pass + + @classmethod + @abstractmethod + def process_text(cls, text: str, language: str) -> Tuple[str, List[int]]: + """Process text into phonemes and tokens + + Args: + text: Input text + language: Language code + + Returns: + tuple[str, list[int]]: Phonemes and token IDs + """ + pass + + @classmethod + @abstractmethod + def generate_from_text(cls, text: str, voicepack: torch.Tensor, language: str, speed: float) -> Tuple[np.ndarray, str]: + """Generate audio from text + + Args: + text: Input text + voicepack: Voice tensor + language: Language code + speed: Speed factor + + Returns: + tuple[np.ndarray, str]: Generated audio samples and phonemes + """ + pass + + @classmethod + @abstractmethod + def generate_from_tokens(cls, tokens: List[int], voicepack: torch.Tensor, speed: float) -> np.ndarray: + """Generate audio from tokens + + Args: + tokens: Token IDs + voicepack: Voice tensor + speed: Speed factor + + Returns: + np.ndarray: Generated audio samples + """ + pass + + @classmethod + def get_device(cls): + """Get the current device""" + if cls._device is None: + raise RuntimeError("Model not initialized. Call setup() first.") + return cls._device diff --git a/api/src/services/tts_cpu.py b/api/src/services/tts_cpu.py new file mode 100644 index 0000000..0436a24 --- /dev/null +++ b/api/src/services/tts_cpu.py @@ -0,0 +1,144 @@ +import os +import numpy as np +import torch +from onnxruntime import InferenceSession, SessionOptions, GraphOptimizationLevel, ExecutionMode +from loguru import logger + +from .tts_base import TTSBaseModel +from .text_processing import phonemize, tokenize +from ..core.config import settings + +class TTSCPUModel(TTSBaseModel): + _instance = None + _onnx_session = None + + @classmethod + def initialize(cls, model_dir: str, model_path: str = None): + """Initialize ONNX model for CPU inference""" + if cls._onnx_session is None: + # Try loading ONNX model + onnx_path = os.path.join(model_dir, settings.onnx_model_path) + if os.path.exists(onnx_path): + logger.info(f"Loading ONNX model from {onnx_path}") + else: + logger.error(f"ONNX model not found at {onnx_path}") + return None + + if not onnx_path: + return None + + logger.info(f"Loading ONNX model from {onnx_path}") + + # Configure ONNX session for optimal performance + session_options = SessionOptions() + + # Set optimization level + if settings.onnx_optimization_level == "all": + session_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL + elif settings.onnx_optimization_level == "basic": + session_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC + else: + session_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL + + # Configure threading + session_options.intra_op_num_threads = settings.onnx_num_threads + session_options.inter_op_num_threads = settings.onnx_inter_op_threads + + # Set execution mode + session_options.execution_mode = ( + ExecutionMode.ORT_PARALLEL + if settings.onnx_execution_mode == "parallel" + else ExecutionMode.ORT_SEQUENTIAL + ) + + # Enable/disable memory pattern optimization + session_options.enable_mem_pattern = settings.onnx_memory_pattern + + # Configure CPU provider options + provider_options = { + 'CPUExecutionProvider': { + 'arena_extend_strategy': settings.onnx_arena_extend_strategy, + 'cpu_memory_arena_cfg': 'cpu:0' + } + } + + cls._onnx_session = InferenceSession( + onnx_path, + sess_options=session_options, + providers=['CPUExecutionProvider'], + provider_options=[provider_options] + ) + + return cls._onnx_session + return cls._onnx_session + + @classmethod + def process_text(cls, text: str, language: str) -> tuple[str, list[int]]: + """Process text into phonemes and tokens + + Args: + text: Input text + language: Language code + + Returns: + tuple[str, list[int]]: Phonemes and token IDs + """ + phonemes = phonemize(text, language) + tokens = tokenize(phonemes) + tokens = [0] + tokens + [0] # Add start/end tokens + return phonemes, tokens + + @classmethod + def generate_from_text(cls, text: str, voicepack: torch.Tensor, language: str, speed: float) -> tuple[np.ndarray, str]: + """Generate audio from text + + Args: + text: Input text + voicepack: Voice tensor + language: Language code + speed: Speed factor + + Returns: + tuple[np.ndarray, str]: Generated audio samples and phonemes + """ + if cls._onnx_session is None: + raise RuntimeError("ONNX model not initialized") + + # Process text + phonemes, tokens = cls.process_text(text, language) + + # Generate audio + audio = cls.generate_from_tokens(tokens, voicepack, speed) + + return audio, phonemes + + @classmethod + def generate_from_tokens(cls, tokens: list[int], voicepack: torch.Tensor, speed: float) -> np.ndarray: + """Generate audio from tokens + + Args: + tokens: Token IDs + voicepack: Voice tensor + speed: Speed factor + + Returns: + np.ndarray: Generated audio samples + """ + if cls._onnx_session is None: + raise RuntimeError("ONNX model not initialized") + + # Pre-allocate and prepare inputs + tokens_input = np.array([tokens], dtype=np.int64) + style_input = voicepack[len(tokens)-2].numpy() # Already has correct dimensions + speed_input = np.full(1, speed, dtype=np.float32) # More efficient than ones * speed + + # Run inference with optimized inputs + result = cls._onnx_session.run( + None, + { + 'tokens': tokens_input, + 'style': style_input, + 'speed': speed_input + } + ) + return result[0] diff --git a/api/src/services/tts_gpu.py b/api/src/services/tts_gpu.py new file mode 100644 index 0000000..300d141 --- /dev/null +++ b/api/src/services/tts_gpu.py @@ -0,0 +1,127 @@ +import os +import numpy as np +import torch +from loguru import logger +from models import build_model +from .text_processing import phonemize, tokenize + +from .tts_base import TTSBaseModel +from ..core.config import settings + +@torch.no_grad() +def forward(model, tokens, ref_s, speed): + """Forward pass through the model""" + device = ref_s.device + tokens = torch.LongTensor([[0, *tokens, 0]]).to(device) + input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device) + text_mask = length_to_mask(input_lengths).to(device) + bert_dur = model.bert(tokens, attention_mask=(~text_mask).int()) + d_en = model.bert_encoder(bert_dur).transpose(-1, -2) + s = ref_s[:, 128:] + d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask) + x, _ = model.predictor.lstm(d) + duration = model.predictor.duration_proj(x) + duration = torch.sigmoid(duration).sum(axis=-1) / speed + pred_dur = torch.round(duration).clamp(min=1).long() + pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item()) + c_frame = 0 + for i in range(pred_aln_trg.size(0)): + pred_aln_trg[i, c_frame : c_frame + pred_dur[0, i].item()] = 1 + c_frame += pred_dur[0, i].item() + en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device) + F0_pred, N_pred = model.predictor.F0Ntrain(en, s) + t_en = model.text_encoder(tokens, input_lengths, text_mask) + asr = t_en @ pred_aln_trg.unsqueeze(0).to(device) + return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy() + +def length_to_mask(lengths): + """Create attention mask from lengths""" + mask = ( + torch.arange(lengths.max()) + .unsqueeze(0) + .expand(lengths.shape[0], -1) + .type_as(lengths) + ) + mask = torch.gt(mask + 1, lengths.unsqueeze(1)) + return mask + +class TTSGPUModel(TTSBaseModel): + _instance = None + _device = "cuda" + + @classmethod + def initialize(cls, model_dir: str, model_path: str): + """Initialize PyTorch model for GPU inference""" + if cls._instance is None and torch.cuda.is_available(): + try: + logger.info("Initializing GPU model") + model_path = os.path.join(model_dir, settings.pytorch_model_path) + model = build_model(model_path, cls._device) + cls._instance = model + return cls._instance + except Exception as e: + logger.error(f"Failed to initialize GPU model: {e}") + return None + return cls._instance + + @classmethod + def process_text(cls, text: str, language: str) -> tuple[str, list[int]]: + """Process text into phonemes and tokens + + Args: + text: Input text + language: Language code + + Returns: + tuple[str, list[int]]: Phonemes and token IDs + """ + phonemes = phonemize(text, language) + tokens = tokenize(phonemes) + return phonemes, tokens + + @classmethod + def generate_from_text(cls, text: str, voicepack: torch.Tensor, language: str, speed: float) -> tuple[np.ndarray, str]: + """Generate audio from text + + Args: + text: Input text + voicepack: Voice tensor + language: Language code + speed: Speed factor + + Returns: + tuple[np.ndarray, str]: Generated audio samples and phonemes + """ + if cls._instance is None: + raise RuntimeError("GPU model not initialized") + + # Process text + phonemes, tokens = cls.process_text(text, language) + + # Generate audio + audio = cls.generate_from_tokens(tokens, voicepack, speed) + + return audio, phonemes + + @classmethod + def generate_from_tokens(cls, tokens: list[int], voicepack: torch.Tensor, speed: float) -> np.ndarray: + """Generate audio from tokens + + Args: + tokens: Token IDs + voicepack: Voice tensor + speed: Speed factor + + Returns: + np.ndarray: Generated audio samples + """ + if cls._instance is None: + raise RuntimeError("GPU model not initialized") + + # Get reference style + ref_s = voicepack[len(tokens)] + + # Generate audio + audio = forward(cls._instance, tokens, ref_s, speed) + + return audio diff --git a/api/src/services/tts_model.py b/api/src/services/tts_model.py new file mode 100644 index 0000000..1e04939 --- /dev/null +++ b/api/src/services/tts_model.py @@ -0,0 +1,8 @@ +import torch + +if torch.cuda.is_available(): + from .tts_gpu import TTSGPUModel as TTSModel +else: + from .tts_cpu import TTSCPUModel as TTSModel + +__all__ = ["TTSModel"] diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py new file mode 100644 index 0000000..6d763fe --- /dev/null +++ b/api/src/services/tts_service.py @@ -0,0 +1,161 @@ +import io +import os +import re +import time +from typing import List, Tuple, Optional + +import numpy as np +import torch +import scipy.io.wavfile as wavfile +from .text_processing import normalize_text +from loguru import logger + +from ..core.config import settings +from .tts_model import TTSModel + + +class TTSService: + def __init__(self, output_dir: str = None): + self.output_dir = output_dir + + def _split_text(self, text: str) -> List[str]: + """Split text into sentences""" + if not isinstance(text, str): + text = str(text) if text is not None else "" + return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()] + + def _get_voice_path(self, voice_name: str) -> Optional[str]: + """Get the path to a voice file""" + voice_path = os.path.join(TTSModel.VOICES_DIR, f"{voice_name}.pt") + return voice_path if os.path.exists(voice_path) else None + + def _generate_audio( + self, text: str, voice: str, speed: float, stitch_long_output: bool = True + ) -> Tuple[torch.Tensor, float]: + """Generate audio and measure processing time""" + start_time = time.time() + + try: + # Normalize text once at the start + if not text: + raise ValueError("Text is empty after preprocessing") + normalized = normalize_text(text) + if not normalized: + raise ValueError("Text is empty after preprocessing") + text = str(normalized) + + # Check voice exists + voice_path = self._get_voice_path(voice) + if not voice_path: + raise ValueError(f"Voice not found: {voice}") + + # Load voice + voicepack = torch.load( + voice_path, map_location=TTSModel.get_device(), weights_only=True + ) + + # Generate audio with or without stitching + if stitch_long_output: + chunks = self._split_text(text) + audio_chunks = [] + + # Process all chunks + for i, chunk in enumerate(chunks): + try: + # Process text and generate audio + phonemes, tokens = TTSModel.process_text(chunk, voice[0]) + chunk_audio = TTSModel.generate_from_tokens(tokens, voicepack, speed) + + if chunk_audio is not None: + audio_chunks.append(chunk_audio) + else: + logger.error(f"No audio generated for chunk {i + 1}/{len(chunks)}") + + except Exception as e: + logger.error( + f"Failed to generate audio for chunk {i + 1}/{len(chunks)}: '{chunk}'. Error: {str(e)}" + ) + continue + + if not audio_chunks: + raise ValueError("No audio chunks were generated successfully") + + audio = ( + np.concatenate(audio_chunks) + if len(audio_chunks) > 1 + else audio_chunks[0] + ) + else: + # Process single chunk + phonemes, tokens = TTSModel.process_text(text, voice[0]) + audio = TTSModel.generate_from_tokens(tokens, voicepack, speed) + + processing_time = time.time() - start_time + return audio, processing_time + + except Exception as e: + logger.error(f"Error in audio generation: {str(e)}") + raise + + def _save_audio(self, audio: torch.Tensor, filepath: str): + """Save audio to file""" + os.makedirs(os.path.dirname(filepath), exist_ok=True) + wavfile.write(filepath, 24000, audio) + + def _audio_to_bytes(self, audio: torch.Tensor) -> bytes: + """Convert audio tensor to WAV bytes""" + buffer = io.BytesIO() + wavfile.write(buffer, 24000, audio) + return buffer.getvalue() + + def combine_voices(self, voices: List[str]) -> str: + """Combine multiple voices into a new voice""" + if len(voices) < 2: + raise ValueError("At least 2 voices are required for combination") + + # Load voices + t_voices: List[torch.Tensor] = [] + v_name: List[str] = [] + + for voice in voices: + try: + voice_path = os.path.join(TTSModel.VOICES_DIR, f"{voice}.pt") + voicepack = torch.load( + voice_path, map_location=TTSModel.get_device(), weights_only=True + ) + t_voices.append(voicepack) + v_name.append(voice) + except Exception as e: + raise ValueError(f"Failed to load voice {voice}: {str(e)}") + + # Combine voices + try: + f: str = "_".join(v_name) + v = torch.mean(torch.stack(t_voices), dim=0) + combined_path = os.path.join(TTSModel.VOICES_DIR, f"{f}.pt") + + # Save combined voice + try: + torch.save(v, combined_path) + except Exception as e: + raise RuntimeError( + f"Failed to save combined voice to {combined_path}: {str(e)}" + ) + + return f + + except Exception as e: + if not isinstance(e, (ValueError, RuntimeError)): + raise RuntimeError(f"Error combining voices: {str(e)}") + raise + + def list_voices(self) -> List[str]: + """List all available voices""" + voices = [] + try: + for file in os.listdir(TTSModel.VOICES_DIR): + if file.endswith(".pt"): + voices.append(file[:-3]) # Remove .pt extension + except Exception as e: + logger.error(f"Error listing voices: {str(e)}") + return sorted(voices) diff --git a/api/src/structures/text_schemas.py b/api/src/structures/text_schemas.py new file mode 100644 index 0000000..5ae1b08 --- /dev/null +++ b/api/src/structures/text_schemas.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel + +class PhonemeRequest(BaseModel): + text: str + language: str = "a" # Default to American English + +class PhonemeResponse(BaseModel): + phonemes: str + tokens: list[int] diff --git a/api/tests/conftest.py b/api/tests/conftest.py index c41172f..fba270b 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -21,8 +21,73 @@ def cleanup(): cleanup_mock_dirs() -# Mock torch and other ML modules before they're imported -sys.modules["torch"] = Mock() +# Create mock torch module +mock_torch = Mock() +mock_torch.cuda = Mock() +mock_torch.cuda.is_available = Mock(return_value=False) + +# Create a mock tensor class that supports basic operations +class MockTensor: + def __init__(self, data): + self.data = data + if isinstance(data, (list, tuple)): + self.shape = [len(data)] + elif isinstance(data, MockTensor): + self.shape = data.shape + else: + self.shape = getattr(data, 'shape', [1]) + + def __getitem__(self, idx): + if isinstance(self.data, (list, tuple)): + if isinstance(idx, slice): + return MockTensor(self.data[idx]) + return self.data[idx] + return self + + def max(self): + if isinstance(self.data, (list, tuple)): + max_val = max(self.data) + return MockTensor(max_val) + return 5 # Default for testing + + def item(self): + if isinstance(self.data, (list, tuple)): + return max(self.data) + if isinstance(self.data, (int, float)): + return self.data + return 5 # Default for testing + + def cuda(self): + """Support cuda conversion""" + return self + + def any(self): + if isinstance(self.data, (list, tuple)): + return any(self.data) + return False + + def all(self): + if isinstance(self.data, (list, tuple)): + return all(self.data) + return True + + def unsqueeze(self, dim): + return self + + def expand(self, *args): + return self + + def type_as(self, other): + return self + +# Add tensor operations to mock torch +mock_torch.tensor = lambda x: MockTensor(x) +mock_torch.zeros = lambda *args: MockTensor([0] * (args[0] if isinstance(args[0], int) else args[0][0])) +mock_torch.arange = lambda x: MockTensor(list(range(x))) +mock_torch.gt = lambda x, y: MockTensor([False] * x.shape[0]) + +# Mock modules before they're imported +sys.modules["torch"] = mock_torch sys.modules["transformers"] = Mock() sys.modules["phonemizer"] = Mock() sys.modules["models"] = Mock() @@ -31,14 +96,22 @@ sys.modules["kokoro"] = Mock() sys.modules["kokoro.generate"] = Mock() sys.modules["kokoro.phonemize"] = Mock() sys.modules["kokoro.tokenize"] = Mock() +sys.modules["onnxruntime"] = Mock() @pytest.fixture(autouse=True) def mock_tts_model(): - """Mock TTSModel to avoid loading real models during tests""" - with patch("api.src.services.tts.TTSModel") as mock: + """Mock TTSModel and TTS model initialization""" + with patch("api.src.services.tts_model.TTSModel") as mock_tts_model, \ + patch("api.src.services.tts_base.TTSBaseModel") as mock_base_model: + + # Mock TTSModel model_instance = Mock() model_instance.get_instance.return_value = model_instance model_instance.get_voicepack.return_value = None - mock.get_instance.return_value = model_instance + mock_tts_model.get_instance.return_value = model_instance + + # Mock TTS model initialization + mock_base_model.setup.return_value = 1 # Return dummy voice count + yield model_instance diff --git a/api/tests/test_main.py b/api/tests/test_main.py index 5b23749..c6a972e 100644 --- a/api/tests/test_main.py +++ b/api/tests/test_main.py @@ -26,13 +26,11 @@ def test_health_check(test_client): @patch("api.src.main.logger") async def test_lifespan_successful_warmup(mock_logger, mock_tts_model): """Test successful model warmup in lifespan""" - # Mock the model initialization with model info and voicepack count - mock_model = MagicMock() # Mock file system for voice counting mock_tts_model.VOICES_DIR = "/mock/voices" with patch("os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt"]): - mock_tts_model.initialize.return_value = (mock_model, 3) # 3 voice files - mock_tts_model._device = "cuda" # Set device class variable + mock_tts_model.setup.return_value = 3 # 3 voice files + mock_tts_model.get_device.return_value = "cuda" # Create an async generator from the lifespan context manager async_gen = lifespan(MagicMock()) @@ -44,8 +42,8 @@ async def test_lifespan_successful_warmup(mock_logger, mock_tts_model): mock_logger.info.assert_any_call("Model loaded and warmed up on cuda") mock_logger.info.assert_any_call("3 voice packs loaded successfully") - # Verify model initialization was called - mock_tts_model.initialize.assert_called_once() + # Verify model setup was called + mock_tts_model.setup.assert_called_once() # Clean up await async_gen.__aexit__(None, None, None) @@ -56,14 +54,14 @@ async def test_lifespan_successful_warmup(mock_logger, mock_tts_model): @patch("api.src.main.logger") async def test_lifespan_failed_warmup(mock_logger, mock_tts_model): """Test failed model warmup in lifespan""" - # Mock the model initialization to fail - mock_tts_model.initialize.side_effect = Exception("Failed to initialize model") + # Mock the model setup to fail + mock_tts_model.setup.side_effect = RuntimeError("Failed to initialize model") # Create an async generator from the lifespan context manager async_gen = lifespan(MagicMock()) # Verify the exception is raised - with pytest.raises(Exception, match="Failed to initialize model"): + with pytest.raises(RuntimeError, match="Failed to initialize model"): await async_gen.__aenter__() # Verify the expected logging sequence @@ -77,20 +75,18 @@ async def test_lifespan_failed_warmup(mock_logger, mock_tts_model): @patch("api.src.main.TTSModel") async def test_lifespan_cuda_warmup(mock_tts_model): """Test model warmup specifically on CUDA""" - # Mock the model initialization with CUDA and voicepacks - mock_model = MagicMock() # Mock file system for voice counting mock_tts_model.VOICES_DIR = "/mock/voices" with patch("os.listdir", return_value=["voice1.pt", "voice2.pt"]): - mock_tts_model.initialize.return_value = (mock_model, 2) # 2 voice files - mock_tts_model._device = "cuda" # Set device class variable + mock_tts_model.setup.return_value = 2 # 2 voice files + mock_tts_model.get_device.return_value = "cuda" # Create an async generator from the lifespan context manager async_gen = lifespan(MagicMock()) await async_gen.__aenter__() - # Verify model was initialized - mock_tts_model.initialize.assert_called_once() + # Verify model setup was called + mock_tts_model.setup.assert_called_once() # Clean up await async_gen.__aexit__(None, None, None) @@ -100,22 +96,20 @@ async def test_lifespan_cuda_warmup(mock_tts_model): @patch("api.src.main.TTSModel") async def test_lifespan_cpu_fallback(mock_tts_model): """Test model warmup falling back to CPU""" - # Mock the model initialization with CPU and voicepacks - mock_model = MagicMock() # Mock file system for voice counting mock_tts_model.VOICES_DIR = "/mock/voices" with patch( "os.listdir", return_value=["voice1.pt", "voice2.pt", "voice3.pt", "voice4.pt"] ): - mock_tts_model.initialize.return_value = (mock_model, 4) # 4 voice files - mock_tts_model._device = "cpu" # Set device class variable + mock_tts_model.setup.return_value = 4 # 4 voice files + mock_tts_model.get_device.return_value = "cpu" # Create an async generator from the lifespan context manager async_gen = lifespan(MagicMock()) await async_gen.__aenter__() - # Verify model was initialized - mock_tts_model.initialize.assert_called_once() + # Verify model setup was called + mock_tts_model.setup.assert_called_once() # Clean up await async_gen.__aexit__(None, None, None) diff --git a/api/tests/test_tts_implementations.py b/api/tests/test_tts_implementations.py new file mode 100644 index 0000000..3f10c17 --- /dev/null +++ b/api/tests/test_tts_implementations.py @@ -0,0 +1,144 @@ +"""Tests for TTS model implementations""" +import os +import torch +import pytest +import numpy as np +from unittest.mock import patch, MagicMock + +from api.src.services.tts_base import TTSBaseModel +from api.src.services.tts_cpu import TTSCPUModel +from api.src.services.tts_gpu import TTSGPUModel, length_to_mask + +# Base Model Tests +def test_get_device_error(): + """Test get_device() raises error when not initialized""" + TTSBaseModel._device = None + with pytest.raises(RuntimeError, match="Model not initialized"): + TTSBaseModel.get_device() + +@patch('torch.cuda.is_available') +@patch('os.path.exists') +@patch('os.path.join') +@patch('os.listdir') +@patch('torch.load') +@patch('torch.save') +def test_setup_cuda_available(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available): + """Test setup with CUDA available""" + TTSBaseModel._device = None + mock_cuda_available.return_value = True + mock_exists.return_value = True + mock_load.return_value = torch.zeros(1) + mock_listdir.return_value = ["voice1.pt", "voice2.pt"] + mock_join.return_value = "/mocked/path" + + # Mock the abstract methods + TTSBaseModel.initialize = MagicMock(return_value=True) + TTSBaseModel.process_text = MagicMock(return_value=("dummy", [1,2,3])) + TTSBaseModel.generate_from_tokens = MagicMock(return_value=np.zeros(1000)) + + voice_count = TTSBaseModel.setup() + assert TTSBaseModel._device == "cuda" + assert voice_count == 2 + +@patch('torch.cuda.is_available') +@patch('os.path.exists') +@patch('os.path.join') +@patch('os.listdir') +@patch('torch.load') +@patch('torch.save') +def test_setup_cuda_unavailable(mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available): + """Test setup with CUDA unavailable""" + TTSBaseModel._device = None + mock_cuda_available.return_value = False + mock_exists.return_value = True + mock_load.return_value = torch.zeros(1) + mock_listdir.return_value = ["voice1.pt", "voice2.pt"] + mock_join.return_value = "/mocked/path" + + # Mock the abstract methods + TTSBaseModel.initialize = MagicMock(return_value=True) + TTSBaseModel.process_text = MagicMock(return_value=("dummy", [1,2,3])) + TTSBaseModel.generate_from_tokens = MagicMock(return_value=np.zeros(1000)) + + voice_count = TTSBaseModel.setup() + assert TTSBaseModel._device == "cpu" + assert voice_count == 2 + +# CPU Model Tests +def test_cpu_initialize_missing_model(): + """Test CPU initialize with missing model""" + with patch('os.path.exists', return_value=False): + result = TTSCPUModel.initialize("dummy_dir") + assert result is None + +def test_cpu_generate_uninitialized(): + """Test CPU generate methods with uninitialized model""" + TTSCPUModel._onnx_session = None + + with pytest.raises(RuntimeError, match="ONNX model not initialized"): + TTSCPUModel.generate_from_text("test", torch.zeros(1), "en", 1.0) + + with pytest.raises(RuntimeError, match="ONNX model not initialized"): + TTSCPUModel.generate_from_tokens([1,2,3], torch.zeros(1), 1.0) + +def test_cpu_process_text(): + """Test CPU process_text functionality""" + with patch('api.src.services.tts_cpu.phonemize') as mock_phonemize, \ + patch('api.src.services.tts_cpu.tokenize') as mock_tokenize: + + mock_phonemize.return_value = "test phonemes" + mock_tokenize.return_value = [1, 2, 3] + + phonemes, tokens = TTSCPUModel.process_text("test", "en") + assert phonemes == "test phonemes" + assert tokens == [0, 1, 2, 3, 0] # Should add start/end tokens + +# GPU Model Tests +@patch('torch.cuda.is_available') +def test_gpu_initialize_cuda_unavailable(mock_cuda_available): + """Test GPU initialize with CUDA unavailable""" + mock_cuda_available.return_value = False + TTSGPUModel._instance = None + + result = TTSGPUModel.initialize("dummy_dir", "dummy_path") + assert result is None + +@patch('api.src.services.tts_gpu.length_to_mask') +def test_gpu_length_to_mask(mock_length_to_mask): + """Test length_to_mask function""" + # Setup mock return value + expected_mask = torch.tensor([ + [False, False, False, True, True], + [False, False, False, False, False] + ]) + mock_length_to_mask.return_value = expected_mask + + # Call function with test input + lengths = torch.tensor([3, 5]) + mask = mock_length_to_mask(lengths) + + # Verify mock was called with correct input + mock_length_to_mask.assert_called_once() + assert torch.equal(mask, expected_mask) + +def test_gpu_generate_uninitialized(): + """Test GPU generate methods with uninitialized model""" + TTSGPUModel._instance = None + + with pytest.raises(RuntimeError, match="GPU model not initialized"): + TTSGPUModel.generate_from_text("test", torch.zeros(1), "en", 1.0) + + with pytest.raises(RuntimeError, match="GPU model not initialized"): + TTSGPUModel.generate_from_tokens([1,2,3], torch.zeros(1), 1.0) + +def test_gpu_process_text(): + """Test GPU process_text functionality""" + with patch('api.src.services.tts_gpu.phonemize') as mock_phonemize, \ + patch('api.src.services.tts_gpu.tokenize') as mock_tokenize: + + mock_phonemize.return_value = "test phonemes" + mock_tokenize.return_value = [1, 2, 3] + + phonemes, tokens = TTSGPUModel.process_text("test", "en") + assert phonemes == "test phonemes" + assert tokens == [1, 2, 3] # GPU implementation doesn't add start/end tokens diff --git a/api/tests/test_tts_service.py b/api/tests/test_tts_service.py index d2a138b..4e63ff1 100644 --- a/api/tests/test_tts_service.py +++ b/api/tests/test_tts_service.py @@ -6,14 +6,19 @@ from unittest.mock import MagicMock, call, patch import numpy as np import torch import pytest +from onnxruntime import InferenceSession -from api.src.services.tts import TTSModel, TTSService +from api.src.core.config import settings +from api.src.services.tts_model import TTSModel +from api.src.services.tts_service import TTSService +from api.src.services.tts_cpu import TTSCPUModel +from api.src.services.tts_gpu import TTSGPUModel @pytest.fixture def tts_service(): """Create a TTSService instance for testing""" - return TTSService(start_worker=False) + return TTSService() @pytest.fixture @@ -68,80 +73,143 @@ def test_list_voices(mock_join, mock_listdir, tts_service): assert "not_a_voice" not in voices -@patch("api.src.services.tts.TTSModel.get_instance") -@patch("api.src.services.tts.TTSModel.get_voicepack") -@patch("api.src.services.tts.normalize_text") -@patch("api.src.services.tts.phonemize") -@patch("api.src.services.tts.tokenize") -@patch("api.src.services.tts.generate") -def test_generate_audio_empty_text( - mock_generate, - mock_tokenize, - mock_phonemize, - mock_normalize, - mock_voicepack, - mock_instance, - tts_service, -): - """Test generating audio with empty text""" - mock_normalize.return_value = "" +@patch("os.listdir") +def test_list_voices_error(mock_listdir, tts_service): + """Test error handling in list_voices""" + mock_listdir.side_effect = Exception("Failed to list directory") + voices = tts_service.list_voices() + assert voices == [] + + +def mock_model_setup(cuda_available=False): + """Helper function to mock model setup""" + # Reset model state + TTSModel._instance = None + TTSModel._device = None + TTSModel._voicepacks = {} + + # Create mock model instance with proper generate method + mock_model = MagicMock() + mock_model.generate.return_value = np.zeros(24000, dtype=np.float32) + TTSModel._instance = mock_model + + # Set device based on CUDA availability + TTSModel._device = "cuda" if cuda_available else "cpu" + + return 3 # Return voice count (including af.pt) + + +def test_model_initialization_cuda(): + """Test model initialization with CUDA""" + # Simulate CUDA availability + voice_count = mock_model_setup(cuda_available=True) + + assert TTSModel.get_device() == "cuda" + assert voice_count == 3 # voice1.pt, voice2.pt, af.pt + + +def test_model_initialization_cpu(): + """Test model initialization with CPU""" + # Simulate no CUDA availability + voice_count = mock_model_setup(cuda_available=False) + + assert TTSModel.get_device() == "cpu" + assert voice_count == 3 # voice1.pt, voice2.pt, af.pt + + +def test_generate_audio_empty_text(tts_service): + """Test generating audio with empty text""" with pytest.raises(ValueError, match="Text is empty after preprocessing"): tts_service._generate_audio("", "af", 1.0) -@patch("api.src.services.tts.TTSModel.get_instance") +@patch("api.src.services.tts_model.TTSModel.get_instance") +@patch("api.src.services.tts_model.TTSModel.get_device") @patch("os.path.exists") -@patch("api.src.services.tts.normalize_text") -@patch("api.src.services.tts.phonemize") -@patch("api.src.services.tts.tokenize") -@patch("api.src.services.tts.generate") +@patch("kokoro.normalize_text") +@patch("kokoro.phonemize") +@patch("kokoro.tokenize") +@patch("kokoro.generate") @patch("torch.load") -def test_generate_audio_no_chunks( +def test_generate_audio_phonemize_error( mock_torch_load, mock_generate, mock_tokenize, mock_phonemize, mock_normalize, mock_exists, + mock_get_device, mock_instance, tts_service, ): - """Test generating audio with no successful chunks""" + """Test handling phonemization error""" mock_normalize.return_value = "Test text" - mock_phonemize.return_value = "Test text" - mock_tokenize.return_value = ["test", "text"] - mock_generate.return_value = (None, None) - mock_instance.return_value = (MagicMock(), "cpu") + mock_phonemize.side_effect = Exception("Phonemization failed") + mock_instance.return_value = (mock_generate, "cpu") # Use the same mock for consistency + mock_get_device.return_value = "cpu" mock_exists.return_value = True - mock_torch_load.return_value = MagicMock() + mock_torch_load.return_value = torch.zeros((10, 24000)) + mock_generate.return_value = (None, None) with pytest.raises(ValueError, match="No audio chunks were generated successfully"): tts_service._generate_audio("Test text", "af", 1.0) -@patch("torch.load") -@patch("torch.save") -@patch("torch.stack") -@patch("torch.mean") +@patch("api.src.services.tts_model.TTSModel.get_instance") +@patch("api.src.services.tts_model.TTSModel.get_device") @patch("os.path.exists") -def test_combine_voices( - mock_exists, mock_mean, mock_stack, mock_save, mock_load, tts_service +@patch("kokoro.normalize_text") +@patch("kokoro.phonemize") +@patch("kokoro.tokenize") +@patch("kokoro.generate") +@patch("torch.load") +def test_generate_audio_error( + mock_torch_load, + mock_generate, + mock_tokenize, + mock_phonemize, + mock_normalize, + mock_exists, + mock_get_device, + mock_instance, + tts_service, ): - """Test combining multiple voices""" - # Setup mocks + """Test handling generation error""" + mock_normalize.return_value = "Test text" + mock_phonemize.return_value = "Test text" + mock_tokenize.return_value = [1, 2] # Return integers instead of strings + mock_generate.side_effect = Exception("Generation failed") + mock_instance.return_value = (mock_generate, "cpu") # Use the same mock for consistency + mock_get_device.return_value = "cpu" mock_exists.return_value = True - mock_load.return_value = torch.tensor([1.0, 2.0]) - mock_stack.return_value = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) - mock_mean.return_value = torch.tensor([2.0, 3.0]) + mock_torch_load.return_value = torch.zeros((10, 24000)) - # Test combining two voices - result = tts_service.combine_voices(["voice1", "voice2"]) + with pytest.raises(ValueError, match="No audio chunks were generated successfully"): + tts_service._generate_audio("Test text", "af", 1.0) - assert result == "voice1_voice2" - mock_stack.assert_called_once() - mock_mean.assert_called_once() - mock_save.assert_called_once() + +def test_save_audio(tts_service, sample_audio, tmp_path): + """Test saving audio to file""" + output_path = os.path.join(tmp_path, "test_output.wav") + tts_service._save_audio(sample_audio, output_path) + assert os.path.exists(output_path) + assert os.path.getsize(output_path) > 0 + + +def test_combine_voices(tts_service): + """Test combining multiple voices""" + # Setup mocks for torch operations + with patch('torch.load', return_value=torch.tensor([1.0, 2.0])), \ + patch('torch.stack', return_value=torch.tensor([[1.0, 2.0], [3.0, 4.0]])), \ + patch('torch.mean', return_value=torch.tensor([2.0, 3.0])), \ + patch('torch.save'), \ + patch('os.path.exists', return_value=True): + + # Test combining two voices + result = tts_service.combine_voices(["voice1", "voice2"]) + + assert result == "voice1_voice2" def test_combine_voices_invalid_input(tts_service): @@ -155,221 +223,17 @@ def test_combine_voices_invalid_input(tts_service): tts_service.combine_voices(["voice1"]) -@patch("os.makedirs") -@patch("os.path.exists") -@patch("os.listdir") -@patch("torch.load") -@patch("torch.save") -@patch("os.path.join") -def test_ensure_voices( - mock_join, - mock_save, - mock_load, - mock_listdir, - mock_exists, - mock_makedirs, - tts_service, -): - """Test voice directory initialization""" - # Setup mocks - mock_exists.side_effect = [ - True, - False, - False, - ] # base_dir exists, voice files don't exist - mock_listdir.return_value = ["voice1.pt", "voice2.pt"] - mock_load.return_value = MagicMock() - mock_join.return_value = "/fake/path" - - # Test voice directory initialization - tts_service._ensure_voices() - - # Verify directory was created - mock_makedirs.assert_called_once() - - # Verify voices were loaded and saved - assert mock_load.call_count == len(mock_listdir.return_value) - assert mock_save.call_count == len(mock_listdir.return_value) - - -@patch("api.src.services.tts.TTSModel.get_instance") -@patch("os.path.exists") -@patch("api.src.services.tts.normalize_text") -@patch("api.src.services.tts.phonemize") -@patch("api.src.services.tts.tokenize") -@patch("api.src.services.tts.generate") -@patch("torch.load") -def test_generate_audio_success( - mock_torch_load, - mock_generate, - mock_tokenize, - mock_phonemize, - mock_normalize, - mock_exists, - mock_instance, - tts_service, - sample_audio, -): - """Test successful audio generation""" - mock_normalize.return_value = "Test text" - mock_phonemize.return_value = "Test text" - mock_tokenize.return_value = ["test", "text"] - mock_generate.return_value = (sample_audio, None) - mock_instance.return_value = (MagicMock(), "cpu") - mock_exists.return_value = True - mock_torch_load.return_value = MagicMock() - - audio, processing_time = tts_service._generate_audio("Test text", "af", 1.0) - assert isinstance(audio, np.ndarray) - assert isinstance(processing_time, float) - assert len(audio) > 0 - - -@patch("api.src.services.tts.torch.cuda.is_available") -@patch("api.src.services.tts.build_model") -def test_model_initialization_cuda(mock_build_model, mock_cuda_available): - """Test model initialization with CUDA""" - mock_cuda_available.return_value = True - mock_model = MagicMock() - mock_build_model.return_value = mock_model - - TTSModel._instance = None # Reset singleton - model, voice_count = TTSModel.initialize() - - assert TTSModel._device == "cuda" # Check the class variable instead - assert model == mock_model - mock_build_model.assert_called_once() - - -@patch("api.src.services.tts.torch.cuda.is_available") -@patch("api.src.services.tts.build_model") -def test_model_initialization_cpu(mock_build_model, mock_cuda_available): - """Test model initialization with CPU""" - mock_cuda_available.return_value = False - mock_model = MagicMock() - mock_build_model.return_value = mock_model - - TTSModel._instance = None # Reset singleton - model, voice_count = TTSModel.initialize() - - assert TTSModel._device == "cpu" # Check the class variable instead - assert model == mock_model - mock_build_model.assert_called_once() - - -@patch("api.src.services.tts.TTSService._get_voice_path") -@patch("api.src.services.tts.TTSModel.get_instance") +@patch("api.src.services.tts_service.TTSService._get_voice_path") +@patch("api.src.services.tts_model.TTSModel.get_instance") def test_voicepack_loading_error(mock_get_instance, mock_get_voice_path): """Test voicepack loading error handling""" mock_get_voice_path.return_value = None - mock_get_instance.return_value = (MagicMock(), "cpu") + mock_instance = MagicMock() + mock_instance.generate.return_value = np.zeros(24000, dtype=np.float32) + mock_get_instance.return_value = (mock_instance, "cpu") TTSModel._voicepacks = {} # Reset voicepacks - service = TTSService(start_worker=False) + service = TTSService() with pytest.raises(ValueError, match="Voice not found: nonexistent_voice"): service._generate_audio("test", "nonexistent_voice", 1.0) - - -@patch("api.src.services.tts.TTSModel") -def test_save_audio(mock_tts_model, tts_service, sample_audio, tmp_path): - """Test saving audio to file""" - output_dir = os.path.join(tmp_path, "test_output") - os.makedirs(output_dir, exist_ok=True) - output_path = os.path.join(output_dir, "audio.wav") - - tts_service._save_audio(sample_audio, output_path) - - assert os.path.exists(output_path) - assert os.path.getsize(output_path) > 0 - - -@patch("api.src.services.tts.TTSModel.get_instance") -@patch("os.path.exists") -@patch("api.src.services.tts.normalize_text") -@patch("api.src.services.tts.generate") -@patch("torch.load") -def test_generate_audio_without_stitching( - mock_torch_load, - mock_generate, - mock_normalize, - mock_exists, - mock_instance, - tts_service, - sample_audio, -): - """Test generating audio without text stitching""" - mock_normalize.return_value = "Test text" - mock_generate.return_value = (sample_audio, None) - mock_instance.return_value = (MagicMock(), "cpu") - mock_exists.return_value = True - mock_torch_load.return_value = MagicMock() - - audio, processing_time = tts_service._generate_audio( - "Test text", "af", 1.0, stitch_long_output=False - ) - assert isinstance(audio, np.ndarray) - assert len(audio) > 0 - mock_generate.assert_called_once() - - -@patch("os.listdir") -def test_list_voices_error(mock_listdir, tts_service): - """Test error handling in list_voices""" - mock_listdir.side_effect = Exception("Failed to list directory") - - voices = tts_service.list_voices() - assert voices == [] - - -@patch("api.src.services.tts.TTSModel.get_instance") -@patch("os.path.exists") -@patch("api.src.services.tts.normalize_text") -@patch("api.src.services.tts.phonemize") -@patch("api.src.services.tts.tokenize") -@patch("api.src.services.tts.generate") -@patch("torch.load") -def test_generate_audio_phonemize_error( - mock_torch_load, - mock_generate, - mock_tokenize, - mock_phonemize, - mock_normalize, - mock_exists, - mock_instance, - tts_service, -): - """Test handling phonemization error""" - mock_normalize.return_value = "Test text" - mock_phonemize.side_effect = Exception("Phonemization failed") - mock_instance.return_value = (MagicMock(), "cpu") - mock_exists.return_value = True - mock_torch_load.return_value = MagicMock() - mock_generate.return_value = (None, None) - - with pytest.raises(ValueError, match="No audio chunks were generated successfully"): - tts_service._generate_audio("Test text", "af", 1.0) - - -@patch("api.src.services.tts.TTSModel.get_instance") -@patch("os.path.exists") -@patch("api.src.services.tts.normalize_text") -@patch("api.src.services.tts.generate") -@patch("torch.load") -def test_generate_audio_error( - mock_torch_load, - mock_generate, - mock_normalize, - mock_exists, - mock_instance, - tts_service, -): - """Test handling generation error""" - mock_normalize.return_value = "Test text" - mock_generate.side_effect = Exception("Generation failed") - mock_instance.return_value = (MagicMock(), "cpu") - mock_exists.return_value = True - mock_torch_load.return_value = MagicMock() - - with pytest.raises(ValueError, match="No audio chunks were generated successfully"): - tts_service._generate_audio("Test text", "af", 1.0) diff --git a/docker-compose.cpu.yml b/docker-compose.cpu.yml index faea2fe..f44f2d4 100644 --- a/docker-compose.cpu.yml +++ b/docker-compose.cpu.yml @@ -36,6 +36,13 @@ services: - "8880:8880" environment: - PYTHONPATH=/app:/app/Kokoro-82M + # ONNX Optimization Settings for vectorized operations + - ONNX_NUM_THREADS=8 # Maximize core usage for vectorized ops + - ONNX_INTER_OP_THREADS=4 # Higher inter-op for parallel matrix operations + - ONNX_EXECUTION_MODE=parallel + - ONNX_OPTIMIZATION_LEVEL=all + - ONNX_MEMORY_PATTERN=true + - ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo depends_on: model-fetcher: condition: service_healthy diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/assorted_checks/__init__.py b/examples/assorted_checks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/assorted_checks/benchmarks/__init__.py b/examples/assorted_checks/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py b/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py new file mode 100644 index 0000000..385abb0 --- /dev/null +++ b/examples/assorted_checks/benchmarks/benchmark_tts_rtf.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +import os +import json +import time +import threading +import queue +import pandas as pd +import sys +from datetime import datetime + +from lib.shared_plotting import plot_system_metrics, plot_correlation +from lib.shared_utils import ( + get_system_metrics, save_json_results, write_benchmark_stats, + real_time_factor +) +from lib.shared_benchmark_utils import ( + get_text_for_tokens, make_tts_request, generate_token_sizes, enc +) + +class SystemMonitor: + def __init__(self, interval=1.0): + self.interval = interval + self.metrics_queue = queue.Queue() + self.stop_event = threading.Event() + self.metrics_timeline = [] + self.start_time = None + + def _monitor_loop(self): + """Background thread function to collect system metrics.""" + while not self.stop_event.is_set(): + metrics = get_system_metrics() + metrics["relative_time"] = time.time() - self.start_time + self.metrics_queue.put(metrics) + time.sleep(self.interval) + + def start(self): + """Start the monitoring thread.""" + self.start_time = time.time() + self.monitor_thread = threading.Thread(target=self._monitor_loop) + self.monitor_thread.daemon = True + self.monitor_thread.start() + + def stop(self): + """Stop the monitoring thread and collect final metrics.""" + self.stop_event.set() + if hasattr(self, 'monitor_thread'): + self.monitor_thread.join(timeout=2) + + # Collect all metrics from queue + while True: + try: + metrics = self.metrics_queue.get_nowait() + self.metrics_timeline.append(metrics) + except queue.Empty: + break + + return self.metrics_timeline + +def main(): + # Initialize system monitor + monitor = SystemMonitor(interval=1.0) # 1 second interval + # Set prefix for output files (e.g. "gpu", "cpu", "onnx", etc.) + prefix = "gpu" + # Generate token sizes + if 'gpu' in prefix: + token_sizes = generate_token_sizes( + max_tokens=5000, dense_step=150, + dense_max=1000, sparse_step=1000) + elif 'cpu' in prefix: + token_sizes = generate_token_sizes( + max_tokens=1000, dense_step=300, + dense_max=1000, sparse_step=0) + else: + token_sizes = generate_token_sizes(max_tokens=3000) + + # Set up paths relative to this file + script_dir = os.path.dirname(os.path.abspath(__file__)) + output_dir = os.path.join(script_dir, "output_audio") + output_data_dir = os.path.join(script_dir, "output_data") + output_plots_dir = os.path.join(script_dir, "output_plots") + + # Create output directories + os.makedirs(output_dir, exist_ok=True) + os.makedirs(output_data_dir, exist_ok=True) + os.makedirs(output_plots_dir, exist_ok=True) + + # Function to prefix filenames + def prefix_path(path: str, filename: str) -> str: + if prefix: + filename = f"{prefix}_{filename}" + return os.path.join(path, filename) + + with open(os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8") as f: + text = f.read() + + total_tokens = len(enc.encode(text)) + print(f"Total tokens in file: {total_tokens}") + + print(f"Testing sizes: {token_sizes}") + + results = [] + test_start_time = time.time() + + # Start system monitoring + monitor.start() + + for num_tokens in token_sizes: + chunk = get_text_for_tokens(text, num_tokens) + actual_tokens = len(enc.encode(chunk)) + + print(f"\nProcessing chunk with {actual_tokens} tokens:") + print(f"Text preview: {chunk[:100]}...") + + processing_time, audio_length = make_tts_request( + chunk, + output_dir=output_dir, + prefix=prefix + ) + if processing_time is None or audio_length is None: + print("Breaking loop due to error") + break + + # Calculate RTF using the correct formula + rtf = real_time_factor(processing_time, audio_length) + print(f"Real-Time Factor: {rtf:.5f}") + + results.append({ + "tokens": actual_tokens, + "processing_time": processing_time, + "output_length": audio_length, + "rtf": rtf, + "elapsed_time": round(time.time() - test_start_time, 2), + }) + + df = pd.DataFrame(results) + if df.empty: + print("No data to plot") + return + + df["tokens_per_second"] = df["tokens"] / df["processing_time"] + + # Write benchmark stats + stats = [ + { + "title": "Benchmark Statistics (with correct RTF)", + "stats": { + "Total tokens processed": df['tokens'].sum(), + "Total audio generated (s)": df['output_length'].sum(), + "Total test duration (s)": df['elapsed_time'].max(), + "Average processing rate (tokens/s)": df['tokens_per_second'].mean(), + "Average RTF": df['rtf'].mean(), + "Average Real Time Speed": 1/df['rtf'].mean() + } + }, + { + "title": "Per-chunk Stats", + "stats": { + "Average chunk size (tokens)": df['tokens'].mean(), + "Min chunk size (tokens)": df['tokens'].min(), + "Max chunk size (tokens)": df['tokens'].max(), + "Average processing time (s)": df['processing_time'].mean(), + "Average output length (s)": df['output_length'].mean() + } + }, + { + "title": "Performance Ranges", + "stats": { + "Processing rate range (tokens/s)": f"{df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f}", + "RTF range": f"{df['rtf'].min():.2f}x - {df['rtf'].max():.2f}x", + "Real Time Speed range": f"{1/df['rtf'].max():.2f}x - {1/df['rtf'].min():.2f}x" + } + } + ] + write_benchmark_stats(stats, prefix_path(output_data_dir, "benchmark_stats_rtf.txt")) + + # Plot Processing Time vs Token Count + plot_correlation( + df, "tokens", "processing_time", + "Processing Time vs Input Size", + "Number of Input Tokens", + "Processing Time (seconds)", + prefix_path(output_plots_dir, "processing_time_rtf.png") + ) + + # Plot RTF vs Token Count + plot_correlation( + df, "tokens", "rtf", + "Real-Time Factor vs Input Size", + "Number of Input Tokens", + "Real-Time Factor (processing time / audio length)", + prefix_path(output_plots_dir, "realtime_factor_rtf.png") + ) + + # Stop monitoring and get final metrics + final_metrics = monitor.stop() + + # Convert metrics timeline to DataFrame for stats + metrics_df = pd.DataFrame(final_metrics) + + # Add system usage stats + if not metrics_df.empty: + stats.append({ + "title": "System Usage Statistics", + "stats": { + "Peak CPU Usage (%)": metrics_df['cpu_percent'].max(), + "Avg CPU Usage (%)": metrics_df['cpu_percent'].mean(), + "Peak RAM Usage (%)": metrics_df['ram_percent'].max(), + "Avg RAM Usage (%)": metrics_df['ram_percent'].mean(), + "Peak RAM Used (GB)": metrics_df['ram_used_gb'].max(), + "Avg RAM Used (GB)": metrics_df['ram_used_gb'].mean(), + } + }) + if 'gpu_memory_used' in metrics_df: + stats[-1]["stats"].update({ + "Peak GPU Memory (MB)": metrics_df['gpu_memory_used'].max(), + "Avg GPU Memory (MB)": metrics_df['gpu_memory_used'].mean(), + }) + + # Plot system metrics + plot_system_metrics(final_metrics, prefix_path(output_plots_dir, "system_usage_rtf.png")) + + # Save final results + save_json_results( + { + "results": results, + "system_metrics": final_metrics, + "test_duration": time.time() - test_start_time + }, + prefix_path(output_data_dir, "benchmark_results_rtf.json") + ) + + print("\nResults saved to:") + print(f"- {prefix_path(output_data_dir, 'benchmark_results_rtf.json')}") + print(f"- {prefix_path(output_data_dir, 'benchmark_stats_rtf.txt')}") + print(f"- {prefix_path(output_plots_dir, 'processing_time_rtf.png')}") + print(f"- {prefix_path(output_plots_dir, 'realtime_factor_rtf.png')}") + print(f"- {prefix_path(output_plots_dir, 'system_usage_rtf.png')}") + print(f"\nAudio files saved in {output_dir} with prefix: {prefix or '(none)'}") + + +if __name__ == "__main__": + main() diff --git a/examples/assorted_checks/benchmarks/depr_benchmark_tts.py b/examples/assorted_checks/benchmarks/depr_benchmark_tts.py new file mode 100644 index 0000000..989c177 --- /dev/null +++ b/examples/assorted_checks/benchmarks/depr_benchmark_tts.py @@ -0,0 +1,165 @@ +import os +import json +import time +import pandas as pd +from examples.assorted_checks.lib.shared_plotting import plot_system_metrics, plot_correlation +from examples.assorted_checks.lib.shared_utils import ( + get_system_metrics, save_json_results, write_benchmark_stats +) +from examples.assorted_checks.lib.shared_benchmark_utils import ( + get_text_for_tokens, make_tts_request, generate_token_sizes, enc +) + + +def main(): + # Get optional prefix from first command line argument + import sys + prefix = sys.argv[1] if len(sys.argv) > 1 else "" + + # Set up paths relative to this file + script_dir = os.path.dirname(os.path.abspath(__file__)) + output_dir = os.path.join(script_dir, "output_audio") + output_data_dir = os.path.join(script_dir, "output_data") + output_plots_dir = os.path.join(script_dir, "output_plots") + + # Create output directories + os.makedirs(output_dir, exist_ok=True) + os.makedirs(output_data_dir, exist_ok=True) + os.makedirs(output_plots_dir, exist_ok=True) + + # Function to prefix filenames + def prefix_path(path: str, filename: str) -> str: + if prefix: + filename = f"{prefix}_{filename}" + return os.path.join(path, filename) + + # Read input text + with open( + os.path.join(script_dir, "the_time_machine_hg_wells.txt"), "r", encoding="utf-8" + ) as f: + text = f.read() + + # Get total tokens in file + total_tokens = len(enc.encode(text)) + print(f"Total tokens in file: {total_tokens}") + + + token_sizes = generate_token_sizes(total_tokens) + + print(f"Testing sizes: {token_sizes}") + + # Process chunks + results = [] + system_metrics = [] + test_start_time = time.time() + + for num_tokens in token_sizes: + # Get text slice with exact token count + chunk = get_text_for_tokens(text, num_tokens) + actual_tokens = len(enc.encode(chunk)) + + print(f"\nProcessing chunk with {actual_tokens} tokens:") + print(f"Text preview: {chunk[:100]}...") + + # Collect system metrics before processing + system_metrics.append(get_system_metrics()) + + processing_time, audio_length = make_tts_request(chunk) + if processing_time is None or audio_length is None: + print("Breaking loop due to error") + break + + # Collect system metrics after processing + system_metrics.append(get_system_metrics()) + + results.append( + { + "tokens": actual_tokens, + "processing_time": processing_time, + "output_length": audio_length, + "realtime_factor": audio_length / processing_time, + "elapsed_time": time.time() - test_start_time, + } + ) + + # Save intermediate results + save_json_results( + {"results": results, "system_metrics": system_metrics}, + prefix_path(output_data_dir, "benchmark_results.json") + ) + + # Create DataFrame and calculate stats + df = pd.DataFrame(results) + if df.empty: + print("No data to plot") + return + + # Calculate useful metrics + df["tokens_per_second"] = df["tokens"] / df["processing_time"] + + # Write benchmark stats + stats = [ + { + "title": "Benchmark Statistics", + "stats": { + "Total tokens processed": df['tokens'].sum(), + "Total audio generated (s)": df['output_length'].sum(), + "Total test duration (s)": df['elapsed_time'].max(), + "Average processing rate (tokens/s)": df['tokens_per_second'].mean(), + "Average realtime factor": df['realtime_factor'].mean() + } + }, + { + "title": "Per-chunk Stats", + "stats": { + "Average chunk size (tokens)": df['tokens'].mean(), + "Min chunk size (tokens)": df['tokens'].min(), + "Max chunk size (tokens)": df['tokens'].max(), + "Average processing time (s)": df['processing_time'].mean(), + "Average output length (s)": df['output_length'].mean() + } + }, + { + "title": "Performance Ranges", + "stats": { + "Processing rate range (tokens/s)": f"{df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f}", + "Realtime factor range": f"{df['realtime_factor'].min():.2f}x - {df['realtime_factor'].max():.2f}x" + } + } + ] + write_benchmark_stats(stats, prefix_path(output_data_dir, "benchmark_stats.txt")) + + # Plot Processing Time vs Token Count + plot_correlation( + df, "tokens", "processing_time", + "Processing Time vs Input Size", + "Number of Input Tokens", + "Processing Time (seconds)", + prefix_path(output_plots_dir, "processing_time.png") + ) + + # Plot Realtime Factor vs Token Count + plot_correlation( + df, "tokens", "realtime_factor", + "Realtime Factor vs Input Size", + "Number of Input Tokens", + "Realtime Factor (output length / processing time)", + prefix_path(output_plots_dir, "realtime_factor.png") + ) + + # Plot system metrics + plot_system_metrics(system_metrics, prefix_path(output_plots_dir, "system_usage.png")) + + print("\nResults saved to:") + print(f"- {prefix_path(output_data_dir, 'benchmark_results.json')}") + print(f"- {prefix_path(output_data_dir, 'benchmark_stats.txt')}") + print(f"- {prefix_path(output_plots_dir, 'processing_time.png')}") + print(f"- {prefix_path(output_plots_dir, 'realtime_factor.png')}") + print(f"- {prefix_path(output_plots_dir, 'system_usage.png')}") + if any("gpu_memory_used" in m for m in system_metrics): + print(f"- {prefix_path(output_plots_dir, 'gpu_usage.png')}") + print(f"\nAudio files saved in {output_dir} with prefix: {prefix or '(none)'}") + + +if __name__ == "__main__": + main() diff --git a/examples/assorted_checks/benchmarks/lib/__init__.py b/examples/assorted_checks/benchmarks/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py b/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py new file mode 100644 index 0000000..c2fd1c4 --- /dev/null +++ b/examples/assorted_checks/benchmarks/lib/shared_benchmark_utils.py @@ -0,0 +1,111 @@ +"""Shared utilities specific to TTS benchmarking.""" +import time +from typing import List, Optional, Tuple + +import requests +import tiktoken + +from .shared_utils import get_audio_length, save_audio_file + +# Global tokenizer instance +enc = tiktoken.get_encoding("cl100k_base") + + +def get_text_for_tokens(text: str, num_tokens: int) -> str: + """Get a slice of text that contains exactly num_tokens tokens. + + Args: + text: Input text to slice + num_tokens: Desired number of tokens + + Returns: + str: Text slice containing exactly num_tokens tokens + """ + tokens = enc.encode(text) + if num_tokens > len(tokens): + return text + return enc.decode(tokens[:num_tokens]) + + +def make_tts_request( + text: str, + output_dir: str = None, + timeout: int = 1800, + prefix: str = "" +) -> Tuple[Optional[float], Optional[float]]: + """Make TTS request using OpenAI-compatible endpoint. + + Args: + text: Input text to convert to speech + output_dir: Directory to save audio files. If None, audio won't be saved. + timeout: Request timeout in seconds + prefix: Optional prefix for output filenames + + Returns: + tuple: (processing_time, audio_length) in seconds, or (None, None) on error + """ + try: + start_time = time.time() + response = requests.post( + "http://localhost:8880/v1/audio/speech", + json={ + "model": "kokoro", + "input": text, + "voice": "af", + "response_format": "wav", + }, + timeout=timeout, + ) + response.raise_for_status() + + processing_time = round(time.time() - start_time, 2) + # Calculate audio length from response content + audio_length = get_audio_length(response.content) + + # Save the audio file if output_dir is provided + if output_dir: + token_count = len(enc.encode(text)) + output_file = save_audio_file( + response.content, + f"chunk_{token_count}_tokens", + output_dir + ) + print(f"Saved audio to {output_file}") + + return processing_time, audio_length + + except requests.exceptions.RequestException as e: + print(f"Error making request for text: {text[:50]}... Error: {str(e)}") + return None, None + except Exception as e: + print(f"Error processing text: {text[:50]}... Error: {str(e)}") + return None, None + + +def generate_token_sizes( + max_tokens: int, + dense_step: int = 100, + dense_max: int = 1000, + sparse_step: int = 1000 +) -> List[int]: + """Generate token size ranges with dense sampling at start. + + Args: + max_tokens: Maximum number of tokens to generate sizes up to + dense_step: Step size for dense sampling range + dense_max: Maximum value for dense sampling + sparse_step: Step size for sparse sampling range + + Returns: + list: Sorted list of token sizes + """ + # Dense sampling at start + dense_range = list(range(dense_step, dense_max + 1, dense_step)) + + if max_tokens <= dense_max or sparse_step < dense_max: + return sorted(dense_range) + # Sparse sampling for larger sizes + sparse_range = list(range(dense_max + sparse_step, max_tokens + 1, sparse_step)) + + # Combine and deduplicate + return sorted(list(set(dense_range + sparse_range))) diff --git a/examples/assorted_checks/benchmarks/lib/shared_plotting.py b/examples/assorted_checks/benchmarks/lib/shared_plotting.py new file mode 100644 index 0000000..b1679df --- /dev/null +++ b/examples/assorted_checks/benchmarks/lib/shared_plotting.py @@ -0,0 +1,176 @@ +"""Shared plotting utilities for benchmarks and tests.""" +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +# Common style configurations +STYLE_CONFIG = { + "background_color": "#1a1a2e", + "primary_color": "#ff2a6d", + "secondary_color": "#05d9e8", + "grid_color": "#ffffff", + "text_color": "#ffffff", + "font_sizes": { + "title": 16, + "label": 14, + "tick": 12, + "text": 10 + } +} + +def setup_plot(fig, ax, title, xlabel=None, ylabel=None): + """Configure plot styling with consistent theme. + + Args: + fig: matplotlib figure object + ax: matplotlib axis object + title: str, plot title + xlabel: str, optional x-axis label + ylabel: str, optional y-axis label + + Returns: + tuple: (fig, ax) with applied styling + """ + # Grid styling + ax.grid(True, linestyle="--", alpha=0.3, color=STYLE_CONFIG["grid_color"]) + + # Title and labels + ax.set_title(title, pad=20, + fontsize=STYLE_CONFIG["font_sizes"]["title"], + fontweight="bold", + color=STYLE_CONFIG["text_color"]) + + if xlabel: + ax.set_xlabel(xlabel, + fontsize=STYLE_CONFIG["font_sizes"]["label"], + fontweight="medium", + color=STYLE_CONFIG["text_color"]) + if ylabel: + ax.set_ylabel(ylabel, + fontsize=STYLE_CONFIG["font_sizes"]["label"], + fontweight="medium", + color=STYLE_CONFIG["text_color"]) + + # Tick styling + ax.tick_params(labelsize=STYLE_CONFIG["font_sizes"]["tick"], + colors=STYLE_CONFIG["text_color"]) + + # Spine styling + for spine in ax.spines.values(): + spine.set_color(STYLE_CONFIG["text_color"]) + spine.set_alpha(0.3) + spine.set_linewidth(0.5) + + # Background colors + ax.set_facecolor(STYLE_CONFIG["background_color"]) + fig.patch.set_facecolor(STYLE_CONFIG["background_color"]) + + return fig, ax + +def plot_system_metrics(metrics_data, output_path): + """Create plots for system metrics over time. + + Args: + metrics_data: list of dicts containing system metrics + output_path: str, path to save the output plot + """ + df = pd.DataFrame(metrics_data) + df["timestamp"] = pd.to_datetime(df["timestamp"]) + elapsed_time = (df["timestamp"] - df["timestamp"].iloc[0]).dt.total_seconds() + + # Get baseline values + baseline_cpu = df["cpu_percent"].iloc[0] + baseline_ram = df["ram_used_gb"].iloc[0] + baseline_gpu = df["gpu_memory_used"].iloc[0] / 1024 if "gpu_memory_used" in df.columns else None + + # Convert GPU memory to GB if present + if "gpu_memory_used" in df.columns: + df["gpu_memory_gb"] = df["gpu_memory_used"] / 1024 + + plt.style.use("dark_background") + + # Create subplots based on available metrics + has_gpu = "gpu_memory_used" in df.columns + num_plots = 3 if has_gpu else 2 + fig, axes = plt.subplots(num_plots, 1, figsize=(15, 5 * num_plots)) + fig.patch.set_facecolor(STYLE_CONFIG["background_color"]) + + # Smoothing window + window = min(5, len(df) // 2) + + # Plot CPU Usage + smoothed_cpu = df["cpu_percent"].rolling(window=window, center=True).mean() + sns.lineplot(x=elapsed_time, y=smoothed_cpu, ax=axes[0], + color=STYLE_CONFIG["primary_color"], linewidth=2) + axes[0].axhline(y=baseline_cpu, color=STYLE_CONFIG["secondary_color"], + linestyle="--", alpha=0.5, label="Baseline") + setup_plot(fig, axes[0], "CPU Usage Over Time", + xlabel="Time (seconds)", ylabel="CPU Usage (%)") + axes[0].set_ylim(0, max(df["cpu_percent"]) * 1.1) + axes[0].legend() + + # Plot RAM Usage + smoothed_ram = df["ram_used_gb"].rolling(window=window, center=True).mean() + sns.lineplot(x=elapsed_time, y=smoothed_ram, ax=axes[1], + color=STYLE_CONFIG["secondary_color"], linewidth=2) + axes[1].axhline(y=baseline_ram, color=STYLE_CONFIG["primary_color"], + linestyle="--", alpha=0.5, label="Baseline") + setup_plot(fig, axes[1], "RAM Usage Over Time", + xlabel="Time (seconds)", ylabel="RAM Usage (GB)") + axes[1].set_ylim(0, max(df["ram_used_gb"]) * 1.1) + axes[1].legend() + + # Plot GPU Memory if available + if has_gpu: + smoothed_gpu = df["gpu_memory_gb"].rolling(window=window, center=True).mean() + sns.lineplot(x=elapsed_time, y=smoothed_gpu, ax=axes[2], + color=STYLE_CONFIG["primary_color"], linewidth=2) + axes[2].axhline(y=baseline_gpu, color=STYLE_CONFIG["secondary_color"], + linestyle="--", alpha=0.5, label="Baseline") + setup_plot(fig, axes[2], "GPU Memory Usage Over Time", + xlabel="Time (seconds)", ylabel="GPU Memory (GB)") + axes[2].set_ylim(0, max(df["gpu_memory_gb"]) * 1.1) + axes[2].legend() + + plt.tight_layout() + plt.savefig(output_path, dpi=300, bbox_inches="tight") + plt.close() + +def plot_correlation(df, x, y, title, xlabel, ylabel, output_path): + """Create correlation plot with regression line and correlation coefficient. + + Args: + df: pandas DataFrame containing the data + x: str, column name for x-axis + y: str, column name for y-axis + title: str, plot title + xlabel: str, x-axis label + ylabel: str, y-axis label + output_path: str, path to save the output plot + """ + plt.style.use("dark_background") + + fig, ax = plt.subplots(figsize=(12, 8)) + + # Scatter plot + sns.scatterplot(data=df, x=x, y=y, s=100, alpha=0.6, + color=STYLE_CONFIG["primary_color"]) + + # Regression line + sns.regplot(data=df, x=x, y=y, scatter=False, + color=STYLE_CONFIG["secondary_color"], + line_kws={"linewidth": 2}) + + # Add correlation coefficient + corr = df[x].corr(df[y]) + plt.text(0.05, 0.95, f"Correlation: {corr:.2f}", + transform=ax.transAxes, + fontsize=STYLE_CONFIG["font_sizes"]["text"], + color=STYLE_CONFIG["text_color"], + bbox=dict(facecolor=STYLE_CONFIG["background_color"], + edgecolor=STYLE_CONFIG["text_color"], + alpha=0.7)) + + setup_plot(fig, ax, title, xlabel=xlabel, ylabel=ylabel) + plt.savefig(output_path, dpi=300, bbox_inches="tight") + plt.close() diff --git a/examples/assorted_checks/benchmarks/lib/shared_utils.py b/examples/assorted_checks/benchmarks/lib/shared_utils.py new file mode 100644 index 0000000..a9c872e --- /dev/null +++ b/examples/assorted_checks/benchmarks/lib/shared_utils.py @@ -0,0 +1,174 @@ +"""Shared utilities for benchmarks and tests.""" +import os +import json +import subprocess +from datetime import datetime +from typing import Any, Dict, List, Optional, Union + +import psutil +import scipy.io.wavfile as wavfile + +# Check for torch availability once at module level +TORCH_AVAILABLE = False +try: + import torch + TORCH_AVAILABLE = torch.cuda.is_available() +except ImportError: + pass + + +def get_audio_length(audio_data: bytes, temp_dir: str = None) -> float: + """Get audio length in seconds from bytes data. + + Args: + audio_data: Raw audio bytes + temp_dir: Directory for temporary file. If None, uses system temp directory. + + Returns: + float: Audio length in seconds + """ + if temp_dir is None: + import tempfile + temp_dir = tempfile.gettempdir() + + temp_path = os.path.join(temp_dir, "temp.wav") + os.makedirs(temp_dir, exist_ok=True) + + with open(temp_path, "wb") as f: + f.write(audio_data) + + try: + rate, data = wavfile.read(temp_path) + return len(data) / rate + finally: + if os.path.exists(temp_path): + os.remove(temp_path) + + +def get_gpu_memory(average: bool = True) -> Optional[Union[float, List[float]]]: + """Get GPU memory usage using PyTorch if available, falling back to nvidia-smi. + + Args: + average: If True and multiple GPUs present, returns average memory usage. + If False, returns list of memory usage per GPU. + + Returns: + float or List[float] or None: GPU memory usage in MB. Returns None if no GPU available. + If average=False and multiple GPUs present, returns list of values. + """ + if TORCH_AVAILABLE: + n_gpus = torch.cuda.device_count() + memory_used = [] + for i in range(n_gpus): + memory_used.append(torch.cuda.memory_allocated(i) / 1024**2) # Convert to MB + + if average and len(memory_used) > 0: + return sum(memory_used) / len(memory_used) + return memory_used if len(memory_used) > 1 else memory_used[0] + + # Fall back to nvidia-smi + try: + result = subprocess.check_output( + ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,nounits,noheader"] + ) + memory_values = [float(x.strip()) for x in result.decode("utf-8").split("\n") if x.strip()] + + if average and len(memory_values) > 0: + return sum(memory_values) / len(memory_values) + return memory_values if len(memory_values) > 1 else memory_values[0] + except (subprocess.CalledProcessError, FileNotFoundError): + return None + + +def get_system_metrics() -> Dict[str, Union[str, float]]: + """Get current system metrics including CPU, RAM, and GPU if available. + + Returns: + dict: System metrics including timestamp, CPU%, RAM%, RAM GB, and GPU MB if available + """ + # Get per-CPU percentages and calculate average + cpu_percentages = psutil.cpu_percent(percpu=True) + avg_cpu = sum(cpu_percentages) / len(cpu_percentages) + + metrics = { + "timestamp": datetime.now().isoformat(), + "cpu_percent": round(avg_cpu, 2), + "ram_percent": psutil.virtual_memory().percent, + "ram_used_gb": psutil.virtual_memory().used / (1024**3), + } + + gpu_mem = get_gpu_memory(average=True) # Use average for system metrics + if gpu_mem is not None: + metrics["gpu_memory_used"] = round(gpu_mem, 2) + + return metrics + + +def save_audio_file(audio_data: bytes, identifier: str, output_dir: str) -> str: + """Save audio data to a file with proper naming and directory creation. + + Args: + audio_data: Raw audio bytes + identifier: String to identify this audio file (e.g. token count, test name) + output_dir: Directory to save the file + + Returns: + str: Path to the saved audio file + """ + os.makedirs(output_dir, exist_ok=True) + output_file = os.path.join(output_dir, f"{identifier}.wav") + + with open(output_file, "wb") as f: + f.write(audio_data) + + return output_file + + +def write_benchmark_stats(stats: List[Dict[str, Any]], output_file: str) -> None: + """Write benchmark statistics to a file in a clean, organized format. + + Args: + stats: List of dictionaries containing stat name/value pairs + output_file: Path to output file + """ + os.makedirs(os.path.dirname(output_file), exist_ok=True) + + with open(output_file, "w") as f: + for section in stats: + # Write section header + f.write(f"=== {section['title']} ===\n\n") + + # Write stats + for label, value in section['stats'].items(): + if isinstance(value, float): + f.write(f"{label}: {value:.2f}\n") + else: + f.write(f"{label}: {value}\n") + f.write("\n") + + +def save_json_results(results: Dict[str, Any], output_file: str) -> None: + """Save benchmark results to a JSON file with proper formatting. + + Args: + results: Dictionary of results to save + output_file: Path to output file + """ + os.makedirs(os.path.dirname(output_file), exist_ok=True) + with open(output_file, "w") as f: + json.dump(results, f, indent=2) + + +def real_time_factor(processing_time: float, audio_length: float, decimals: int = 2) -> float: + """Calculate Real-Time Factor (RTF) as processing-time / length-of-audio. + + Args: + processing_time: Time taken to process/generate audio + audio_length: Length of the generated audio + decimals: Number of decimal places to round to + + Returns: + float: RTF value + """ + rtf = processing_time / audio_length + return round(rtf, decimals) diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results.json new file mode 100644 index 0000000..5c60933 --- /dev/null +++ b/examples/assorted_checks/benchmarks/output_data/benchmark_results.json @@ -0,0 +1,111 @@ +{ + "results": [ + { + "tokens": 100, + "processing_time": 18.833295583724976, + "output_length": 31.15, + "realtime_factor": 1.6539856161403135, + "elapsed_time": 19.024322748184204 + }, + { + "tokens": 200, + "processing_time": 38.95506024360657, + "output_length": 62.6, + "realtime_factor": 1.6069799304257042, + "elapsed_time": 58.21527123451233 + }, + { + "tokens": 300, + "processing_time": 49.74252939224243, + "output_length": 96.325, + "realtime_factor": 1.9364716908630366, + "elapsed_time": 108.19673728942871 + }, + { + "tokens": 400, + "processing_time": 61.349056243896484, + "output_length": 128.575, + "realtime_factor": 2.095794261102292, + "elapsed_time": 169.733656167984 + }, + { + "tokens": 500, + "processing_time": 82.86568236351013, + "output_length": 158.575, + "realtime_factor": 1.9136389815071193, + "elapsed_time": 252.7968451976776 + } + ], + "system_metrics": [ + { + "timestamp": "2025-01-03T00:13:49.865330", + "cpu_percent": 8.0, + "ram_percent": 39.4, + "ram_used_gb": 25.03811264038086, + "gpu_memory_used": 1204.0 + }, + { + "timestamp": "2025-01-03T00:14:08.781551", + "cpu_percent": 26.8, + "ram_percent": 42.6, + "ram_used_gb": 27.090862274169922, + "gpu_memory_used": 1225.0 + }, + { + "timestamp": "2025-01-03T00:14:08.916973", + "cpu_percent": 16.1, + "ram_percent": 42.6, + "ram_used_gb": 27.089553833007812, + "gpu_memory_used": 1225.0 + }, + { + "timestamp": "2025-01-03T00:14:47.979053", + "cpu_percent": 31.5, + "ram_percent": 43.6, + "ram_used_gb": 27.714427947998047, + "gpu_memory_used": 1225.0 + }, + { + "timestamp": "2025-01-03T00:14:48.098976", + "cpu_percent": 20.0, + "ram_percent": 43.6, + "ram_used_gb": 27.704315185546875, + "gpu_memory_used": 1211.0 + }, + { + "timestamp": "2025-01-03T00:15:37.944729", + "cpu_percent": 29.7, + "ram_percent": 38.6, + "ram_used_gb": 24.53925323486328, + "gpu_memory_used": 1217.0 + }, + { + "timestamp": "2025-01-03T00:15:38.071915", + "cpu_percent": 8.6, + "ram_percent": 38.5, + "ram_used_gb": 24.51690673828125, + "gpu_memory_used": 1208.0 + }, + { + "timestamp": "2025-01-03T00:16:39.525449", + "cpu_percent": 23.4, + "ram_percent": 38.8, + "ram_used_gb": 24.71230697631836, + "gpu_memory_used": 1221.0 + }, + { + "timestamp": "2025-01-03T00:16:39.612442", + "cpu_percent": 5.5, + "ram_percent": 38.9, + "ram_used_gb": 24.72066879272461, + "gpu_memory_used": 1221.0 + }, + { + "timestamp": "2025-01-03T00:18:02.569076", + "cpu_percent": 27.4, + "ram_percent": 39.1, + "ram_used_gb": 24.868202209472656, + "gpu_memory_used": 1264.0 + } + ] +} \ No newline at end of file diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json new file mode 100644 index 0000000..52f8f04 --- /dev/null +++ b/examples/assorted_checks/benchmarks/output_data/benchmark_results_cpu.json @@ -0,0 +1,216 @@ +{ + "results": [ + { + "tokens": 100, + "processing_time": 14.349808931350708, + "output_length": 31.15, + "rtf": 0.46, + "elapsed_time": 14.716031074523926 + }, + { + "tokens": 200, + "processing_time": 28.341803312301636, + "output_length": 62.6, + "rtf": 0.45, + "elapsed_time": 43.44207406044006 + }, + { + "tokens": 300, + "processing_time": 43.352553606033325, + "output_length": 96.325, + "rtf": 0.45, + "elapsed_time": 87.26906609535217 + }, + { + "tokens": 400, + "processing_time": 71.02449822425842, + "output_length": 128.575, + "rtf": 0.55, + "elapsed_time": 158.7198133468628 + }, + { + "tokens": 500, + "processing_time": 70.92521691322327, + "output_length": 158.575, + "rtf": 0.45, + "elapsed_time": 230.01379895210266 + }, + { + "tokens": 600, + "processing_time": 83.6328592300415, + "output_length": 189.25, + "rtf": 0.44, + "elapsed_time": 314.02610969543457 + }, + { + "tokens": 700, + "processing_time": 103.0810194015503, + "output_length": 222.075, + "rtf": 0.46, + "elapsed_time": 417.5678551197052 + }, + { + "tokens": 800, + "processing_time": 127.02162909507751, + "output_length": 253.85, + "rtf": 0.5, + "elapsed_time": 545.0128681659698 + }, + { + "tokens": 900, + "processing_time": 130.49781227111816, + "output_length": 283.775, + "rtf": 0.46, + "elapsed_time": 675.8943417072296 + }, + { + "tokens": 1000, + "processing_time": 154.76425909996033, + "output_length": 315.475, + "rtf": 0.49, + "elapsed_time": 831.0677945613861 + } + ], + "system_metrics": [ + { + "timestamp": "2025-01-03T00:23:52.896889", + "cpu_percent": 4.5, + "ram_percent": 39.1, + "ram_used_gb": 24.86032485961914, + "gpu_memory_used": 1281.0 + }, + { + "timestamp": "2025-01-03T00:24:07.429461", + "cpu_percent": 4.5, + "ram_percent": 39.1, + "ram_used_gb": 24.847564697265625, + "gpu_memory_used": 1285.0 + }, + { + "timestamp": "2025-01-03T00:24:07.620587", + "cpu_percent": 2.7, + "ram_percent": 39.1, + "ram_used_gb": 24.846607208251953, + "gpu_memory_used": 1275.0 + }, + { + "timestamp": "2025-01-03T00:24:36.140754", + "cpu_percent": 5.4, + "ram_percent": 39.1, + "ram_used_gb": 24.857810974121094, + "gpu_memory_used": 1267.0 + }, + { + "timestamp": "2025-01-03T00:24:36.340675", + "cpu_percent": 6.2, + "ram_percent": 39.1, + "ram_used_gb": 24.85773468017578, + "gpu_memory_used": 1267.0 + }, + { + "timestamp": "2025-01-03T00:25:19.905634", + "cpu_percent": 29.1, + "ram_percent": 39.2, + "ram_used_gb": 24.920318603515625, + "gpu_memory_used": 1256.0 + }, + { + "timestamp": "2025-01-03T00:25:20.182219", + "cpu_percent": 20.0, + "ram_percent": 39.2, + "ram_used_gb": 24.930198669433594, + "gpu_memory_used": 1256.0 + }, + { + "timestamp": "2025-01-03T00:26:31.414760", + "cpu_percent": 5.3, + "ram_percent": 39.5, + "ram_used_gb": 25.127891540527344, + "gpu_memory_used": 1259.0 + }, + { + "timestamp": "2025-01-03T00:26:31.617256", + "cpu_percent": 3.6, + "ram_percent": 39.5, + "ram_used_gb": 25.126346588134766, + "gpu_memory_used": 1252.0 + }, + { + "timestamp": "2025-01-03T00:27:42.736097", + "cpu_percent": 10.5, + "ram_percent": 39.5, + "ram_used_gb": 25.100231170654297, + "gpu_memory_used": 1249.0 + }, + { + "timestamp": "2025-01-03T00:27:42.912870", + "cpu_percent": 5.3, + "ram_percent": 39.5, + "ram_used_gb": 25.098285675048828, + "gpu_memory_used": 1249.0 + }, + { + "timestamp": "2025-01-03T00:29:06.725264", + "cpu_percent": 8.9, + "ram_percent": 39.5, + "ram_used_gb": 25.123123168945312, + "gpu_memory_used": 1239.0 + }, + { + "timestamp": "2025-01-03T00:29:06.928826", + "cpu_percent": 5.5, + "ram_percent": 39.5, + "ram_used_gb": 25.128646850585938, + "gpu_memory_used": 1239.0 + }, + { + "timestamp": "2025-01-03T00:30:50.206349", + "cpu_percent": 49.6, + "ram_percent": 39.6, + "ram_used_gb": 25.162948608398438, + "gpu_memory_used": 1245.0 + }, + { + "timestamp": "2025-01-03T00:30:50.491837", + "cpu_percent": 14.8, + "ram_percent": 39.5, + "ram_used_gb": 25.13379669189453, + "gpu_memory_used": 1245.0 + }, + { + "timestamp": "2025-01-03T00:32:57.721467", + "cpu_percent": 6.2, + "ram_percent": 39.6, + "ram_used_gb": 25.187721252441406, + "gpu_memory_used": 1384.0 + }, + { + "timestamp": "2025-01-03T00:32:57.913350", + "cpu_percent": 3.6, + "ram_percent": 39.6, + "ram_used_gb": 25.199390411376953, + "gpu_memory_used": 1384.0 + }, + { + "timestamp": "2025-01-03T00:35:08.608730", + "cpu_percent": 6.3, + "ram_percent": 39.8, + "ram_used_gb": 25.311710357666016, + "gpu_memory_used": 1330.0 + }, + { + "timestamp": "2025-01-03T00:35:08.791851", + "cpu_percent": 5.3, + "ram_percent": 39.8, + "ram_used_gb": 25.326683044433594, + "gpu_memory_used": 1333.0 + }, + { + "timestamp": "2025-01-03T00:37:43.782406", + "cpu_percent": 6.8, + "ram_percent": 40.6, + "ram_used_gb": 25.803058624267578, + "gpu_memory_used": 1409.0 + } + ] +} \ No newline at end of file diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json new file mode 100644 index 0000000..59ad009 --- /dev/null +++ b/examples/assorted_checks/benchmarks/output_data/benchmark_results_rtf.json @@ -0,0 +1,300 @@ +{ + "results": [ + { + "tokens": 100, + "processing_time": 0.96, + "output_length": 31.1, + "rtf": 0.03, + "elapsed_time": 1.11 + }, + { + "tokens": 250, + "processing_time": 2.23, + "output_length": 77.17, + "rtf": 0.03, + "elapsed_time": 3.49 + }, + { + "tokens": 400, + "processing_time": 4.05, + "output_length": 128.05, + "rtf": 0.03, + "elapsed_time": 7.77 + }, + { + "tokens": 550, + "processing_time": 4.06, + "output_length": 171.45, + "rtf": 0.02, + "elapsed_time": 12.0 + }, + { + "tokens": 700, + "processing_time": 6.01, + "output_length": 221.6, + "rtf": 0.03, + "elapsed_time": 18.16 + }, + { + "tokens": 850, + "processing_time": 6.9, + "output_length": 269.1, + "rtf": 0.03, + "elapsed_time": 25.21 + }, + { + "tokens": 1000, + "processing_time": 7.65, + "output_length": 315.05, + "rtf": 0.02, + "elapsed_time": 33.03 + }, + { + "tokens": 6000, + "processing_time": 48.7, + "output_length": 1837.1, + "rtf": 0.03, + "elapsed_time": 82.21 + }, + { + "tokens": 11000, + "processing_time": 92.44, + "output_length": 3388.57, + "rtf": 0.03, + "elapsed_time": 175.46 + }, + { + "tokens": 16000, + "processing_time": 163.61, + "output_length": 4977.32, + "rtf": 0.03, + "elapsed_time": 340.46 + }, + { + "tokens": 21000, + "processing_time": 209.72, + "output_length": 6533.3, + "rtf": 0.03, + "elapsed_time": 551.92 + }, + { + "tokens": 26000, + "processing_time": 329.35, + "output_length": 8068.15, + "rtf": 0.04, + "elapsed_time": 883.37 + }, + { + "tokens": 31000, + "processing_time": 473.52, + "output_length": 9611.48, + "rtf": 0.05, + "elapsed_time": 1359.28 + }, + { + "tokens": 36000, + "processing_time": 650.98, + "output_length": 11157.15, + "rtf": 0.06, + "elapsed_time": 2012.9 + } + ], + "system_metrics": [ + { + "timestamp": "2025-01-03T14:41:01.331735", + "cpu_percent": 7.5, + "ram_percent": 50.2, + "ram_used_gb": 31.960269927978516, + "gpu_memory_used": 3191.0 + }, + { + "timestamp": "2025-01-03T14:41:02.357116", + "cpu_percent": 17.01, + "ram_percent": 50.2, + "ram_used_gb": 31.96163558959961, + "gpu_memory_used": 3426.0 + }, + { + "timestamp": "2025-01-03T14:41:02.445009", + "cpu_percent": 9.5, + "ram_percent": 50.3, + "ram_used_gb": 31.966781616210938, + "gpu_memory_used": 3426.0 + }, + { + "timestamp": "2025-01-03T14:41:04.742152", + "cpu_percent": 18.27, + "ram_percent": 50.4, + "ram_used_gb": 32.08788299560547, + "gpu_memory_used": 3642.0 + }, + { + "timestamp": "2025-01-03T14:41:04.847795", + "cpu_percent": 16.27, + "ram_percent": 50.5, + "ram_used_gb": 32.094364166259766, + "gpu_memory_used": 3640.0 + }, + { + "timestamp": "2025-01-03T14:41:09.019590", + "cpu_percent": 15.97, + "ram_percent": 50.7, + "ram_used_gb": 32.23244094848633, + "gpu_memory_used": 3640.0 + }, + { + "timestamp": "2025-01-03T14:41:09.110324", + "cpu_percent": 3.54, + "ram_percent": 50.7, + "ram_used_gb": 32.234458923339844, + "gpu_memory_used": 3640.0 + }, + { + "timestamp": "2025-01-03T14:41:13.252607", + "cpu_percent": 13.4, + "ram_percent": 50.6, + "ram_used_gb": 32.194271087646484, + "gpu_memory_used": 3935.0 + }, + { + "timestamp": "2025-01-03T14:41:13.327557", + "cpu_percent": 4.69, + "ram_percent": 50.6, + "ram_used_gb": 32.191776275634766, + "gpu_memory_used": 3935.0 + }, + { + "timestamp": "2025-01-03T14:41:19.413633", + "cpu_percent": 12.92, + "ram_percent": 50.9, + "ram_used_gb": 32.3467903137207, + "gpu_memory_used": 4250.0 + }, + { + "timestamp": "2025-01-03T14:41:19.492758", + "cpu_percent": 7.5, + "ram_percent": 50.8, + "ram_used_gb": 32.34375, + "gpu_memory_used": 4250.0 + }, + { + "timestamp": "2025-01-03T14:41:26.467284", + "cpu_percent": 13.09, + "ram_percent": 51.2, + "ram_used_gb": 32.56281280517578, + "gpu_memory_used": 4249.0 + }, + { + "timestamp": "2025-01-03T14:41:26.553559", + "cpu_percent": 8.39, + "ram_percent": 51.2, + "ram_used_gb": 32.56183624267578, + "gpu_memory_used": 4249.0 + }, + { + "timestamp": "2025-01-03T14:41:34.284362", + "cpu_percent": 12.61, + "ram_percent": 51.7, + "ram_used_gb": 32.874778747558594, + "gpu_memory_used": 4250.0 + }, + { + "timestamp": "2025-01-03T14:41:34.362353", + "cpu_percent": 1.25, + "ram_percent": 51.7, + "ram_used_gb": 32.87461471557617, + "gpu_memory_used": 4250.0 + }, + { + "timestamp": "2025-01-03T14:42:23.471312", + "cpu_percent": 11.64, + "ram_percent": 54.9, + "ram_used_gb": 34.90264129638672, + "gpu_memory_used": 4647.0 + }, + { + "timestamp": "2025-01-03T14:42:23.547203", + "cpu_percent": 5.31, + "ram_percent": 54.9, + "ram_used_gb": 34.91563415527344, + "gpu_memory_used": 4647.0 + }, + { + "timestamp": "2025-01-03T14:43:56.724933", + "cpu_percent": 12.97, + "ram_percent": 59.5, + "ram_used_gb": 37.84241485595703, + "gpu_memory_used": 4655.0 + }, + { + "timestamp": "2025-01-03T14:43:56.815453", + "cpu_percent": 11.75, + "ram_percent": 59.5, + "ram_used_gb": 37.832679748535156, + "gpu_memory_used": 4655.0 + }, + { + "timestamp": "2025-01-03T14:46:41.705155", + "cpu_percent": 12.94, + "ram_percent": 66.3, + "ram_used_gb": 42.1534538269043, + "gpu_memory_used": 4729.0 + }, + { + "timestamp": "2025-01-03T14:46:41.835177", + "cpu_percent": 7.73, + "ram_percent": 66.2, + "ram_used_gb": 42.13554000854492, + "gpu_memory_used": 4729.0 + }, + { + "timestamp": "2025-01-03T14:50:13.166236", + "cpu_percent": 11.62, + "ram_percent": 73.4, + "ram_used_gb": 46.71288299560547, + "gpu_memory_used": 4676.0 + }, + { + "timestamp": "2025-01-03T14:50:13.261611", + "cpu_percent": 8.16, + "ram_percent": 73.4, + "ram_used_gb": 46.71356201171875, + "gpu_memory_used": 4676.0 + }, + { + "timestamp": "2025-01-03T14:55:44.623607", + "cpu_percent": 12.92, + "ram_percent": 82.8, + "ram_used_gb": 52.65533447265625, + "gpu_memory_used": 4636.0 + }, + { + "timestamp": "2025-01-03T14:55:44.735410", + "cpu_percent": 15.29, + "ram_percent": 82.7, + "ram_used_gb": 52.63290786743164, + "gpu_memory_used": 4636.0 + }, + { + "timestamp": "2025-01-03T15:03:40.534449", + "cpu_percent": 13.88, + "ram_percent": 85.0, + "ram_used_gb": 54.050071716308594, + "gpu_memory_used": 4771.0 + }, + { + "timestamp": "2025-01-03T15:03:40.638708", + "cpu_percent": 12.21, + "ram_percent": 85.0, + "ram_used_gb": 54.053733825683594, + "gpu_memory_used": 4771.0 + }, + { + "timestamp": "2025-01-03T15:14:34.159142", + "cpu_percent": 14.51, + "ram_percent": 78.1, + "ram_used_gb": 49.70396423339844, + "gpu_memory_used": 4739.0 + } + ] +} \ No newline at end of file diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt new file mode 100644 index 0000000..010d116 --- /dev/null +++ b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_cpu.txt @@ -0,0 +1,19 @@ +=== Benchmark Statistics (with correct RTF) === + +Overall Stats: +Total tokens processed: 5500 +Total audio generated: 1741.65s +Total test duration: 831.07s +Average processing rate: 6.72 tokens/second +Average RTF: 0.47x + +Per-chunk Stats: +Average chunk size: 550.00 tokens +Min chunk size: 100.00 tokens +Max chunk size: 1000.00 tokens +Average processing time: 82.70s +Average output length: 174.17s + +Performance Ranges: +Processing rate range: 5.63 - 7.17 tokens/second +RTF range: 0.44x - 0.55x diff --git a/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt new file mode 100644 index 0000000..e7bed5f --- /dev/null +++ b/examples/assorted_checks/benchmarks/output_data/benchmark_stats_rtf.txt @@ -0,0 +1,9 @@ +=== Benchmark Statistics (with correct RTF) === + +Overall Stats: +Total tokens processed: 150850 +Total audio generated: 46786.59s +Total test duration: 2012.90s +Average processing rate: 104.34 tokens/second +Average RTF: 0.03x + diff --git a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json new file mode 100644 index 0000000..edcb334 --- /dev/null +++ b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_results_rtf.json @@ -0,0 +1,1804 @@ +{ + "results": [ + { + "tokens": 300, + "processing_time": 41.62, + "output_length": 96.425, + "rtf": 0.43, + "elapsed_time": 41.68 + }, + { + "tokens": 600, + "processing_time": 81.72, + "output_length": 188.675, + "rtf": 0.43, + "elapsed_time": 123.49 + }, + { + "tokens": 900, + "processing_time": 120.55, + "output_length": 283.425, + "rtf": 0.43, + "elapsed_time": 244.1 + } + ], + "system_metrics": [ + { + "timestamp": "2025-01-04T01:30:26.991154", + "cpu_percent": 7.83, + "ram_percent": 48.2, + "ram_used_gb": 30.669906616210938, + "gpu_memory_used": 1243.0, + "relative_time": 0.07800030708312988 + }, + { + "timestamp": "2025-01-04T01:30:28.079669", + "cpu_percent": 59.43, + "ram_percent": 48.2, + "ram_used_gb": 30.675106048583984, + "gpu_memory_used": 1244.0, + "relative_time": 1.1842052936553955 + }, + { + "timestamp": "2025-01-04T01:30:29.185881", + "cpu_percent": 57.14, + "ram_percent": 47.9, + "ram_used_gb": 30.473060607910156, + "gpu_memory_used": 1246.0, + "relative_time": 2.31345796585083 + }, + { + "timestamp": "2025-01-04T01:30:30.312825", + "cpu_percent": 49.54, + "ram_percent": 47.9, + "ram_used_gb": 30.49838638305664, + "gpu_memory_used": 1248.0, + "relative_time": 3.42720627784729 + }, + { + "timestamp": "2025-01-04T01:30:31.421201", + "cpu_percent": 47.16, + "ram_percent": 47.9, + "ram_used_gb": 30.44550323486328, + "gpu_memory_used": 1251.0, + "relative_time": 4.517812728881836 + }, + { + "timestamp": "2025-01-04T01:30:32.514913", + "cpu_percent": 47.98, + "ram_percent": 47.8, + "ram_used_gb": 30.41952896118164, + "gpu_memory_used": 1251.0, + "relative_time": 5.647390604019165 + }, + { + "timestamp": "2025-01-04T01:30:33.649021", + "cpu_percent": 48.55, + "ram_percent": 47.8, + "ram_used_gb": 30.400592803955078, + "gpu_memory_used": 1249.0, + "relative_time": 6.729969263076782 + }, + { + "timestamp": "2025-01-04T01:30:34.723785", + "cpu_percent": 43.88, + "ram_percent": 47.8, + "ram_used_gb": 30.390079498291016, + "gpu_memory_used": 1253.0, + "relative_time": 7.860571622848511 + }, + { + "timestamp": "2025-01-04T01:30:35.864707", + "cpu_percent": 50.01, + "ram_percent": 47.8, + "ram_used_gb": 30.380477905273438, + "gpu_memory_used": 1253.0, + "relative_time": 8.9869704246521 + }, + { + "timestamp": "2025-01-04T01:30:36.982950", + "cpu_percent": 49.29, + "ram_percent": 47.8, + "ram_used_gb": 30.41130828857422, + "gpu_memory_used": 1255.0, + "relative_time": 10.097310066223145 + }, + { + "timestamp": "2025-01-04T01:30:38.099505", + "cpu_percent": 52.99, + "ram_percent": 47.8, + "ram_used_gb": 30.410892486572266, + "gpu_memory_used": 1252.0, + "relative_time": 11.204046249389648 + }, + { + "timestamp": "2025-01-04T01:30:39.205066", + "cpu_percent": 42.98, + "ram_percent": 47.8, + "ram_used_gb": 30.40534210205078, + "gpu_memory_used": 1253.0, + "relative_time": 12.306914329528809 + }, + { + "timestamp": "2025-01-04T01:30:40.305591", + "cpu_percent": 47.11, + "ram_percent": 47.8, + "ram_used_gb": 30.40200424194336, + "gpu_memory_used": 1253.0, + "relative_time": 13.411193370819092 + }, + { + "timestamp": "2025-01-04T01:30:41.410928", + "cpu_percent": 50.09, + "ram_percent": 47.8, + "ram_used_gb": 30.39764404296875, + "gpu_memory_used": 1260.0, + "relative_time": 14.534100770950317 + }, + { + "timestamp": "2025-01-04T01:30:42.530654", + "cpu_percent": 57.82, + "ram_percent": 47.8, + "ram_used_gb": 30.39893341064453, + "gpu_memory_used": 1256.0, + "relative_time": 15.66111135482788 + }, + { + "timestamp": "2025-01-04T01:30:43.666031", + "cpu_percent": 52.61, + "ram_percent": 47.8, + "ram_used_gb": 30.37706756591797, + "gpu_memory_used": 1256.0, + "relative_time": 16.79327368736267 + }, + { + "timestamp": "2025-01-04T01:30:44.794904", + "cpu_percent": 57.14, + "ram_percent": 47.7, + "ram_used_gb": 30.36868667602539, + "gpu_memory_used": 1256.0, + "relative_time": 17.861677646636963 + }, + { + "timestamp": "2025-01-04T01:30:45.865891", + "cpu_percent": 66.7, + "ram_percent": 47.7, + "ram_used_gb": 30.371902465820312, + "gpu_memory_used": 1257.0, + "relative_time": 18.96451497077942 + }, + { + "timestamp": "2025-01-04T01:30:46.971206", + "cpu_percent": 53.61, + "ram_percent": 47.7, + "ram_used_gb": 30.352508544921875, + "gpu_memory_used": 1254.0, + "relative_time": 20.086195945739746 + }, + { + "timestamp": "2025-01-04T01:30:48.089632", + "cpu_percent": 50.26, + "ram_percent": 47.7, + "ram_used_gb": 30.349388122558594, + "gpu_memory_used": 1248.0, + "relative_time": 21.199003219604492 + }, + { + "timestamp": "2025-01-04T01:30:49.191842", + "cpu_percent": 48.22, + "ram_percent": 47.7, + "ram_used_gb": 30.344642639160156, + "gpu_memory_used": 1251.0, + "relative_time": 22.322958946228027 + }, + { + "timestamp": "2025-01-04T01:30:50.324994", + "cpu_percent": 55.64, + "ram_percent": 47.7, + "ram_used_gb": 30.35323715209961, + "gpu_memory_used": 1251.0, + "relative_time": 23.469967365264893 + }, + { + "timestamp": "2025-01-04T01:30:51.477231", + "cpu_percent": 45.68, + "ram_percent": 47.7, + "ram_used_gb": 30.35232162475586, + "gpu_memory_used": 1251.0, + "relative_time": 24.579415798187256 + }, + { + "timestamp": "2025-01-04T01:30:52.585934", + "cpu_percent": 46.07, + "ram_percent": 47.7, + "ram_used_gb": 30.32147216796875, + "gpu_memory_used": 1244.0, + "relative_time": 25.71301007270813 + }, + { + "timestamp": "2025-01-04T01:30:53.707821", + "cpu_percent": 47.54, + "ram_percent": 47.6, + "ram_used_gb": 30.296611785888672, + "gpu_memory_used": 1244.0, + "relative_time": 26.7750301361084 + }, + { + "timestamp": "2025-01-04T01:30:54.766880", + "cpu_percent": 44.8, + "ram_percent": 47.6, + "ram_used_gb": 30.28769302368164, + "gpu_memory_used": 1237.0, + "relative_time": 27.87526297569275 + }, + { + "timestamp": "2025-01-04T01:30:55.873403", + "cpu_percent": 48.82, + "ram_percent": 47.6, + "ram_used_gb": 30.285594940185547, + "gpu_memory_used": 1237.0, + "relative_time": 29.00292205810547 + }, + { + "timestamp": "2025-01-04T01:30:57.003386", + "cpu_percent": 55.54, + "ram_percent": 47.6, + "ram_used_gb": 30.30721664428711, + "gpu_memory_used": 1237.0, + "relative_time": 30.13248038291931 + }, + { + "timestamp": "2025-01-04T01:30:58.135723", + "cpu_percent": 46.97, + "ram_percent": 47.7, + "ram_used_gb": 30.319698333740234, + "gpu_memory_used": 1237.0, + "relative_time": 31.280652046203613 + }, + { + "timestamp": "2025-01-04T01:30:59.274397", + "cpu_percent": 46.94, + "ram_percent": 47.7, + "ram_used_gb": 30.31420135498047, + "gpu_memory_used": 1239.0, + "relative_time": 32.39983797073364 + }, + { + "timestamp": "2025-01-04T01:31:00.405545", + "cpu_percent": 53.81, + "ram_percent": 47.7, + "ram_used_gb": 30.335922241210938, + "gpu_memory_used": 1243.0, + "relative_time": 33.502938985824585 + }, + { + "timestamp": "2025-01-04T01:31:01.497496", + "cpu_percent": 51.0, + "ram_percent": 47.7, + "ram_used_gb": 30.325199127197266, + "gpu_memory_used": 1243.0, + "relative_time": 34.584938526153564 + }, + { + "timestamp": "2025-01-04T01:31:02.583134", + "cpu_percent": 49.26, + "ram_percent": 47.6, + "ram_used_gb": 30.30097198486328, + "gpu_memory_used": 1243.0, + "relative_time": 35.680947065353394 + }, + { + "timestamp": "2025-01-04T01:31:03.686381", + "cpu_percent": 48.91, + "ram_percent": 47.6, + "ram_used_gb": 30.300418853759766, + "gpu_memory_used": 1243.0, + "relative_time": 36.786722898483276 + }, + { + "timestamp": "2025-01-04T01:31:04.786497", + "cpu_percent": 48.69, + "ram_percent": 47.6, + "ram_used_gb": 30.29620361328125, + "gpu_memory_used": 1243.0, + "relative_time": 37.90794491767883 + }, + { + "timestamp": "2025-01-04T01:31:05.908563", + "cpu_percent": 50.43, + "ram_percent": 47.6, + "ram_used_gb": 30.29269027709961, + "gpu_memory_used": 1243.0, + "relative_time": 39.01517176628113 + }, + { + "timestamp": "2025-01-04T01:31:07.014496", + "cpu_percent": 48.22, + "ram_percent": 47.6, + "ram_used_gb": 30.298015594482422, + "gpu_memory_used": 1243.0, + "relative_time": 40.118446826934814 + }, + { + "timestamp": "2025-01-04T01:31:08.120066", + "cpu_percent": 47.47, + "ram_percent": 47.7, + "ram_used_gb": 30.312705993652344, + "gpu_memory_used": 1243.0, + "relative_time": 41.22802424430847 + }, + { + "timestamp": "2025-01-04T01:31:09.225367", + "cpu_percent": 41.09, + "ram_percent": 47.7, + "ram_used_gb": 30.34886932373047, + "gpu_memory_used": 1244.0, + "relative_time": 42.34174656867981 + }, + { + "timestamp": "2025-01-04T01:31:10.339308", + "cpu_percent": 44.12, + "ram_percent": 47.7, + "ram_used_gb": 30.353790283203125, + "gpu_memory_used": 1245.0, + "relative_time": 43.44456744194031 + }, + { + "timestamp": "2025-01-04T01:31:11.443944", + "cpu_percent": 48.99, + "ram_percent": 47.7, + "ram_used_gb": 30.34658432006836, + "gpu_memory_used": 1245.0, + "relative_time": 44.53658318519592 + }, + { + "timestamp": "2025-01-04T01:31:12.533026", + "cpu_percent": 47.62, + "ram_percent": 47.7, + "ram_used_gb": 30.318241119384766, + "gpu_memory_used": 1245.0, + "relative_time": 45.6171441078186 + }, + { + "timestamp": "2025-01-04T01:31:13.617044", + "cpu_percent": 49.3, + "ram_percent": 47.7, + "ram_used_gb": 30.318588256835938, + "gpu_memory_used": 1245.0, + "relative_time": 46.71653604507446 + }, + { + "timestamp": "2025-01-04T01:31:14.718976", + "cpu_percent": 48.42, + "ram_percent": 47.7, + "ram_used_gb": 30.316349029541016, + "gpu_memory_used": 1239.0, + "relative_time": 47.80844783782959 + }, + { + "timestamp": "2025-01-04T01:31:15.805079", + "cpu_percent": 47.56, + "ram_percent": 47.6, + "ram_used_gb": 30.30520248413086, + "gpu_memory_used": 1239.0, + "relative_time": 48.90499949455261 + }, + { + "timestamp": "2025-01-04T01:31:16.902878", + "cpu_percent": 49.11, + "ram_percent": 47.6, + "ram_used_gb": 30.306812286376953, + "gpu_memory_used": 1232.0, + "relative_time": 50.034260749816895 + }, + { + "timestamp": "2025-01-04T01:31:18.035723", + "cpu_percent": 45.81, + "ram_percent": 47.7, + "ram_used_gb": 30.32524871826172, + "gpu_memory_used": 1237.0, + "relative_time": 51.1371693611145 + }, + { + "timestamp": "2025-01-04T01:31:19.143169", + "cpu_percent": 49.94, + "ram_percent": 47.7, + "ram_used_gb": 30.323795318603516, + "gpu_memory_used": 1237.0, + "relative_time": 52.227344274520874 + }, + { + "timestamp": "2025-01-04T01:31:20.230256", + "cpu_percent": 39.57, + "ram_percent": 47.7, + "ram_used_gb": 30.330493927001953, + "gpu_memory_used": 1237.0, + "relative_time": 53.34033155441284 + }, + { + "timestamp": "2025-01-04T01:31:21.331797", + "cpu_percent": 44.34, + "ram_percent": 47.7, + "ram_used_gb": 30.330425262451172, + "gpu_memory_used": 1237.0, + "relative_time": 54.45246958732605 + }, + { + "timestamp": "2025-01-04T01:31:22.450663", + "cpu_percent": 46.87, + "ram_percent": 47.6, + "ram_used_gb": 30.3084716796875, + "gpu_memory_used": 1237.0, + "relative_time": 55.55728077888489 + }, + { + "timestamp": "2025-01-04T01:31:23.550691", + "cpu_percent": 49.88, + "ram_percent": 47.6, + "ram_used_gb": 30.309173583984375, + "gpu_memory_used": 1243.0, + "relative_time": 56.65515089035034 + }, + { + "timestamp": "2025-01-04T01:31:24.650939", + "cpu_percent": 51.21, + "ram_percent": 47.6, + "ram_used_gb": 30.30620574951172, + "gpu_memory_used": 1243.0, + "relative_time": 57.726617097854614 + }, + { + "timestamp": "2025-01-04T01:31:25.728955", + "cpu_percent": 45.22, + "ram_percent": 47.6, + "ram_used_gb": 30.291912078857422, + "gpu_memory_used": 1243.0, + "relative_time": 58.82792663574219 + }, + { + "timestamp": "2025-01-04T01:31:26.829490", + "cpu_percent": 48.86, + "ram_percent": 47.6, + "ram_used_gb": 30.289695739746094, + "gpu_memory_used": 1243.0, + "relative_time": 59.93786025047302 + }, + { + "timestamp": "2025-01-04T01:31:27.937071", + "cpu_percent": 45.69, + "ram_percent": 47.6, + "ram_used_gb": 30.302818298339844, + "gpu_memory_used": 1243.0, + "relative_time": 61.05047869682312 + }, + { + "timestamp": "2025-01-04T01:31:29.044046", + "cpu_percent": 51.09, + "ram_percent": 47.6, + "ram_used_gb": 30.307464599609375, + "gpu_memory_used": 1243.0, + "relative_time": 62.159112215042114 + }, + { + "timestamp": "2025-01-04T01:31:30.162426", + "cpu_percent": 47.04, + "ram_percent": 47.7, + "ram_used_gb": 30.32668685913086, + "gpu_memory_used": 1243.0, + "relative_time": 63.249592542648315 + }, + { + "timestamp": "2025-01-04T01:31:31.251755", + "cpu_percent": 45.32, + "ram_percent": 47.7, + "ram_used_gb": 30.330463409423828, + "gpu_memory_used": 1243.0, + "relative_time": 64.35896062850952 + }, + { + "timestamp": "2025-01-04T01:31:32.362284", + "cpu_percent": 47.2, + "ram_percent": 47.7, + "ram_used_gb": 30.314319610595703, + "gpu_memory_used": 1239.0, + "relative_time": 65.4672338962555 + }, + { + "timestamp": "2025-01-04T01:31:33.468921", + "cpu_percent": 48.94, + "ram_percent": 47.6, + "ram_used_gb": 30.308246612548828, + "gpu_memory_used": 1243.0, + "relative_time": 66.5955581665039 + }, + { + "timestamp": "2025-01-04T01:31:34.594176", + "cpu_percent": 47.88, + "ram_percent": 47.6, + "ram_used_gb": 30.29806137084961, + "gpu_memory_used": 1243.0, + "relative_time": 67.68029594421387 + }, + { + "timestamp": "2025-01-04T01:31:35.682260", + "cpu_percent": 45.92, + "ram_percent": 47.6, + "ram_used_gb": 30.299114227294922, + "gpu_memory_used": 1243.0, + "relative_time": 68.7970290184021 + }, + { + "timestamp": "2025-01-04T01:31:36.802433", + "cpu_percent": 51.07, + "ram_percent": 47.6, + "ram_used_gb": 30.29195785522461, + "gpu_memory_used": 1243.0, + "relative_time": 69.92168736457825 + }, + { + "timestamp": "2025-01-04T01:31:37.926464", + "cpu_percent": 47.29, + "ram_percent": 47.7, + "ram_used_gb": 30.324363708496094, + "gpu_memory_used": 1243.0, + "relative_time": 71.05467820167542 + }, + { + "timestamp": "2025-01-04T01:31:39.059936", + "cpu_percent": 48.91, + "ram_percent": 47.7, + "ram_used_gb": 30.32428741455078, + "gpu_memory_used": 1243.0, + "relative_time": 72.14405465126038 + }, + { + "timestamp": "2025-01-04T01:31:40.142859", + "cpu_percent": 44.66, + "ram_percent": 47.7, + "ram_used_gb": 30.33354949951172, + "gpu_memory_used": 1243.0, + "relative_time": 73.25559496879578 + }, + { + "timestamp": "2025-01-04T01:31:41.254868", + "cpu_percent": 48.98, + "ram_percent": 47.7, + "ram_used_gb": 30.344337463378906, + "gpu_memory_used": 1237.0, + "relative_time": 74.35676956176758 + }, + { + "timestamp": "2025-01-04T01:31:42.354977", + "cpu_percent": 50.79, + "ram_percent": 47.7, + "ram_used_gb": 30.322650909423828, + "gpu_memory_used": 1237.0, + "relative_time": 75.43929266929626 + }, + { + "timestamp": "2025-01-04T01:31:43.432869", + "cpu_percent": 45.86, + "ram_percent": 47.7, + "ram_used_gb": 30.316268920898438, + "gpu_memory_used": 1237.0, + "relative_time": 76.53794598579407 + }, + { + "timestamp": "2025-01-04T01:31:44.535917", + "cpu_percent": 47.22, + "ram_percent": 47.6, + "ram_used_gb": 30.308757781982422, + "gpu_memory_used": 1237.0, + "relative_time": 77.6620762348175 + }, + { + "timestamp": "2025-01-04T01:31:45.666281", + "cpu_percent": 51.06, + "ram_percent": 47.6, + "ram_used_gb": 30.307342529296875, + "gpu_memory_used": 1237.0, + "relative_time": 78.77155900001526 + }, + { + "timestamp": "2025-01-04T01:31:46.771605", + "cpu_percent": 47.82, + "ram_percent": 47.6, + "ram_used_gb": 30.298141479492188, + "gpu_memory_used": 1237.0, + "relative_time": 79.87201809883118 + }, + { + "timestamp": "2025-01-04T01:31:47.874817", + "cpu_percent": 44.51, + "ram_percent": 47.7, + "ram_used_gb": 30.322750091552734, + "gpu_memory_used": 1243.0, + "relative_time": 80.97521829605103 + }, + { + "timestamp": "2025-01-04T01:31:48.983338", + "cpu_percent": 47.69, + "ram_percent": 47.7, + "ram_used_gb": 30.3226318359375, + "gpu_memory_used": 1243.0, + "relative_time": 82.09707593917847 + }, + { + "timestamp": "2025-01-04T01:31:50.102541", + "cpu_percent": 42.36, + "ram_percent": 47.7, + "ram_used_gb": 30.32965087890625, + "gpu_memory_used": 1243.0, + "relative_time": 83.20944809913635 + }, + { + "timestamp": "2025-01-04T01:31:51.204766", + "cpu_percent": 45.87, + "ram_percent": 47.7, + "ram_used_gb": 30.32353973388672, + "gpu_memory_used": 1243.0, + "relative_time": 84.31531429290771 + }, + { + "timestamp": "2025-01-04T01:31:52.310873", + "cpu_percent": 50.01, + "ram_percent": 47.6, + "ram_used_gb": 30.296016693115234, + "gpu_memory_used": 1247.0, + "relative_time": 85.4254515171051 + }, + { + "timestamp": "2025-01-04T01:31:53.429342", + "cpu_percent": 49.65, + "ram_percent": 47.6, + "ram_used_gb": 30.306453704833984, + "gpu_memory_used": 1246.0, + "relative_time": 86.51991653442383 + }, + { + "timestamp": "2025-01-04T01:31:54.517894", + "cpu_percent": 47.29, + "ram_percent": 47.6, + "ram_used_gb": 30.30263900756836, + "gpu_memory_used": 1245.0, + "relative_time": 87.60364723205566 + }, + { + "timestamp": "2025-01-04T01:31:55.602848", + "cpu_percent": 47.48, + "ram_percent": 47.6, + "ram_used_gb": 30.303203582763672, + "gpu_memory_used": 1245.0, + "relative_time": 88.68531346321106 + }, + { + "timestamp": "2025-01-04T01:31:56.677895", + "cpu_percent": 46.74, + "ram_percent": 47.6, + "ram_used_gb": 30.29749298095703, + "gpu_memory_used": 1245.0, + "relative_time": 89.78639531135559 + }, + { + "timestamp": "2025-01-04T01:31:57.794084", + "cpu_percent": 43.92, + "ram_percent": 47.7, + "ram_used_gb": 30.313438415527344, + "gpu_memory_used": 1244.0, + "relative_time": 90.89922308921814 + }, + { + "timestamp": "2025-01-04T01:31:58.901464", + "cpu_percent": 48.88, + "ram_percent": 47.7, + "ram_used_gb": 30.32254409790039, + "gpu_memory_used": 1244.0, + "relative_time": 91.96823143959045 + }, + { + "timestamp": "2025-01-04T01:31:59.972227", + "cpu_percent": 38.89, + "ram_percent": 47.7, + "ram_used_gb": 30.32897186279297, + "gpu_memory_used": 1245.0, + "relative_time": 93.08689904212952 + }, + { + "timestamp": "2025-01-04T01:32:01.089013", + "cpu_percent": 49.22, + "ram_percent": 47.7, + "ram_used_gb": 30.328304290771484, + "gpu_memory_used": 1250.0, + "relative_time": 94.20951867103577 + }, + { + "timestamp": "2025-01-04T01:32:02.202304", + "cpu_percent": 46.56, + "ram_percent": 47.6, + "ram_used_gb": 30.29920196533203, + "gpu_memory_used": 1250.0, + "relative_time": 95.29210877418518 + }, + { + "timestamp": "2025-01-04T01:32:03.292108", + "cpu_percent": 46.39, + "ram_percent": 47.6, + "ram_used_gb": 30.308143615722656, + "gpu_memory_used": 1250.0, + "relative_time": 96.40629982948303 + }, + { + "timestamp": "2025-01-04T01:32:04.402400", + "cpu_percent": 49.88, + "ram_percent": 47.6, + "ram_used_gb": 30.310047149658203, + "gpu_memory_used": 1250.0, + "relative_time": 97.51973557472229 + }, + { + "timestamp": "2025-01-04T01:32:05.513450", + "cpu_percent": 53.28, + "ram_percent": 47.6, + "ram_used_gb": 30.30374526977539, + "gpu_memory_used": 1249.0, + "relative_time": 98.62612318992615 + }, + { + "timestamp": "2025-01-04T01:32:06.631627", + "cpu_percent": 44.65, + "ram_percent": 47.6, + "ram_used_gb": 30.30333709716797, + "gpu_memory_used": 1242.0, + "relative_time": 99.73457670211792 + }, + { + "timestamp": "2025-01-04T01:32:07.736449", + "cpu_percent": 50.93, + "ram_percent": 47.7, + "ram_used_gb": 30.33118438720703, + "gpu_memory_used": 1242.0, + "relative_time": 100.85807871818542 + }, + { + "timestamp": "2025-01-04T01:32:08.860429", + "cpu_percent": 62.71, + "ram_percent": 47.8, + "ram_used_gb": 30.41672134399414, + "gpu_memory_used": 1244.0, + "relative_time": 102.08941197395325 + }, + { + "timestamp": "2025-01-04T01:32:10.080974", + "cpu_percent": 96.29, + "ram_percent": 47.9, + "ram_used_gb": 30.45757293701172, + "gpu_memory_used": 1245.0, + "relative_time": 103.18154048919678 + }, + { + "timestamp": "2025-01-04T01:32:11.187912", + "cpu_percent": 49.09, + "ram_percent": 47.9, + "ram_used_gb": 30.445499420166016, + "gpu_memory_used": 1245.0, + "relative_time": 104.30198311805725 + }, + { + "timestamp": "2025-01-04T01:32:12.306213", + "cpu_percent": 51.15, + "ram_percent": 47.8, + "ram_used_gb": 30.42266845703125, + "gpu_memory_used": 1240.0, + "relative_time": 105.43745422363281 + }, + { + "timestamp": "2025-01-04T01:32:13.437791", + "cpu_percent": 47.79, + "ram_percent": 47.8, + "ram_used_gb": 30.40296173095703, + "gpu_memory_used": 1236.0, + "relative_time": 106.55609393119812 + }, + { + "timestamp": "2025-01-04T01:32:14.548441", + "cpu_percent": 39.41, + "ram_percent": 47.8, + "ram_used_gb": 30.406475067138672, + "gpu_memory_used": 1244.0, + "relative_time": 107.67082047462463 + }, + { + "timestamp": "2025-01-04T01:32:15.666526", + "cpu_percent": 77.07, + "ram_percent": 47.8, + "ram_used_gb": 30.424407958984375, + "gpu_memory_used": 1247.0, + "relative_time": 108.7851665019989 + }, + { + "timestamp": "2025-01-04T01:32:16.780793", + "cpu_percent": 49.13, + "ram_percent": 47.8, + "ram_used_gb": 30.429065704345703, + "gpu_memory_used": 1246.0, + "relative_time": 109.88107633590698 + }, + { + "timestamp": "2025-01-04T01:32:17.879071", + "cpu_percent": 82.96, + "ram_percent": 47.8, + "ram_used_gb": 30.428447723388672, + "gpu_memory_used": 1281.0, + "relative_time": 111.02328372001648 + }, + { + "timestamp": "2025-01-04T01:32:19.026978", + "cpu_percent": 74.64, + "ram_percent": 47.8, + "ram_used_gb": 30.430500030517578, + "gpu_memory_used": 1281.0, + "relative_time": 112.15347504615784 + }, + { + "timestamp": "2025-01-04T01:32:20.156784", + "cpu_percent": 76.94, + "ram_percent": 47.8, + "ram_used_gb": 30.40774917602539, + "gpu_memory_used": 1274.0, + "relative_time": 113.31317591667175 + }, + { + "timestamp": "2025-01-04T01:32:21.310871", + "cpu_percent": 69.52, + "ram_percent": 47.8, + "ram_used_gb": 30.429115295410156, + "gpu_memory_used": 1273.0, + "relative_time": 114.42301273345947 + }, + { + "timestamp": "2025-01-04T01:32:22.424508", + "cpu_percent": 74.47, + "ram_percent": 47.8, + "ram_used_gb": 30.403045654296875, + "gpu_memory_used": 1274.0, + "relative_time": 115.52539491653442 + }, + { + "timestamp": "2025-01-04T01:32:23.525673", + "cpu_percent": 67.2, + "ram_percent": 47.9, + "ram_used_gb": 30.484474182128906, + "gpu_memory_used": 1273.0, + "relative_time": 116.61319661140442 + }, + { + "timestamp": "2025-01-04T01:32:24.613302", + "cpu_percent": 57.41, + "ram_percent": 47.7, + "ram_used_gb": 30.35879135131836, + "gpu_memory_used": 1270.0, + "relative_time": 117.72619676589966 + }, + { + "timestamp": "2025-01-04T01:32:25.730732", + "cpu_percent": 45.97, + "ram_percent": 47.7, + "ram_used_gb": 30.347335815429688, + "gpu_memory_used": 1270.0, + "relative_time": 118.84320116043091 + }, + { + "timestamp": "2025-01-04T01:32:26.845420", + "cpu_percent": 47.74, + "ram_percent": 47.7, + "ram_used_gb": 30.354007720947266, + "gpu_memory_used": 1265.0, + "relative_time": 119.96074485778809 + }, + { + "timestamp": "2025-01-04T01:32:27.964248", + "cpu_percent": 60.0, + "ram_percent": 47.7, + "ram_used_gb": 30.3675537109375, + "gpu_memory_used": 1258.0, + "relative_time": 121.09439873695374 + }, + { + "timestamp": "2025-01-04T01:32:29.094542", + "cpu_percent": 54.46, + "ram_percent": 47.7, + "ram_used_gb": 30.367305755615234, + "gpu_memory_used": 1230.0, + "relative_time": 122.24102592468262 + }, + { + "timestamp": "2025-01-04T01:32:30.244200", + "cpu_percent": 56.21, + "ram_percent": 47.7, + "ram_used_gb": 30.364959716796875, + "gpu_memory_used": 1230.0, + "relative_time": 123.34450554847717 + }, + { + "timestamp": "2025-01-04T01:32:31.346103", + "cpu_percent": 40.66, + "ram_percent": 47.8, + "ram_used_gb": 30.420738220214844, + "gpu_memory_used": 1235.0, + "relative_time": 124.46777892112732 + }, + { + "timestamp": "2025-01-04T01:32:32.463710", + "cpu_percent": 51.66, + "ram_percent": 47.8, + "ram_used_gb": 30.396198272705078, + "gpu_memory_used": 1235.0, + "relative_time": 125.57916116714478 + }, + { + "timestamp": "2025-01-04T01:32:33.580811", + "cpu_percent": 49.68, + "ram_percent": 47.8, + "ram_used_gb": 30.40151596069336, + "gpu_memory_used": 1236.0, + "relative_time": 126.6768786907196 + }, + { + "timestamp": "2025-01-04T01:32:34.668960", + "cpu_percent": 49.09, + "ram_percent": 47.8, + "ram_used_gb": 30.380916595458984, + "gpu_memory_used": 1236.0, + "relative_time": 127.73568296432495 + }, + { + "timestamp": "2025-01-04T01:32:35.729484", + "cpu_percent": 48.53, + "ram_percent": 47.8, + "ram_used_gb": 30.385761260986328, + "gpu_memory_used": 1236.0, + "relative_time": 128.85891699790955 + }, + { + "timestamp": "2025-01-04T01:32:36.849812", + "cpu_percent": 52.39, + "ram_percent": 47.8, + "ram_used_gb": 30.414752960205078, + "gpu_memory_used": 1235.0, + "relative_time": 129.9150390625 + }, + { + "timestamp": "2025-01-04T01:32:37.919974", + "cpu_percent": 46.89, + "ram_percent": 47.9, + "ram_used_gb": 30.450218200683594, + "gpu_memory_used": 1235.0, + "relative_time": 131.00502228736877 + }, + { + "timestamp": "2025-01-04T01:32:39.008115", + "cpu_percent": 46.59, + "ram_percent": 47.8, + "ram_used_gb": 30.436458587646484, + "gpu_memory_used": 1235.0, + "relative_time": 132.10191130638123 + }, + { + "timestamp": "2025-01-04T01:32:40.095463", + "cpu_percent": 45.76, + "ram_percent": 47.9, + "ram_used_gb": 30.443893432617188, + "gpu_memory_used": 1224.0, + "relative_time": 133.26839780807495 + }, + { + "timestamp": "2025-01-04T01:32:41.265737", + "cpu_percent": 56.94, + "ram_percent": 47.8, + "ram_used_gb": 30.41216278076172, + "gpu_memory_used": 1224.0, + "relative_time": 134.32926607131958 + }, + { + "timestamp": "2025-01-04T01:32:42.321015", + "cpu_percent": 40.36, + "ram_percent": 47.8, + "ram_used_gb": 30.386669158935547, + "gpu_memory_used": 1224.0, + "relative_time": 135.40537309646606 + }, + { + "timestamp": "2025-01-04T01:32:43.400382", + "cpu_percent": 44.51, + "ram_percent": 47.8, + "ram_used_gb": 30.39049530029297, + "gpu_memory_used": 1224.0, + "relative_time": 136.52469301223755 + }, + { + "timestamp": "2025-01-04T01:32:44.524119", + "cpu_percent": 50.29, + "ram_percent": 47.8, + "ram_used_gb": 30.38903045654297, + "gpu_memory_used": 1220.0, + "relative_time": 137.60522270202637 + }, + { + "timestamp": "2025-01-04T01:32:45.599869", + "cpu_percent": 51.69, + "ram_percent": 47.8, + "ram_used_gb": 30.378681182861328, + "gpu_memory_used": 1213.0, + "relative_time": 138.7130560874939 + }, + { + "timestamp": "2025-01-04T01:32:46.711674", + "cpu_percent": 49.55, + "ram_percent": 47.7, + "ram_used_gb": 30.34076690673828, + "gpu_memory_used": 1213.0, + "relative_time": 139.8105547428131 + }, + { + "timestamp": "2025-01-04T01:32:47.813091", + "cpu_percent": 44.5, + "ram_percent": 47.7, + "ram_used_gb": 30.343746185302734, + "gpu_memory_used": 1213.0, + "relative_time": 140.91643166542053 + }, + { + "timestamp": "2025-01-04T01:32:48.917679", + "cpu_percent": 43.76, + "ram_percent": 47.7, + "ram_used_gb": 30.354793548583984, + "gpu_memory_used": 1213.0, + "relative_time": 142.04264283180237 + }, + { + "timestamp": "2025-01-04T01:32:50.047653", + "cpu_percent": 48.41, + "ram_percent": 47.7, + "ram_used_gb": 30.361080169677734, + "gpu_memory_used": 1219.0, + "relative_time": 143.14667677879333 + }, + { + "timestamp": "2025-01-04T01:32:51.153490", + "cpu_percent": 57.01, + "ram_percent": 47.9, + "ram_used_gb": 30.4390869140625, + "gpu_memory_used": 1232.0, + "relative_time": 144.2709481716156 + }, + { + "timestamp": "2025-01-04T01:32:52.272196", + "cpu_percent": 54.69, + "ram_percent": 47.9, + "ram_used_gb": 30.46664047241211, + "gpu_memory_used": 1236.0, + "relative_time": 145.36933588981628 + }, + { + "timestamp": "2025-01-04T01:32:53.374563", + "cpu_percent": 51.37, + "ram_percent": 47.9, + "ram_used_gb": 30.487388610839844, + "gpu_memory_used": 1245.0, + "relative_time": 146.4400930404663 + }, + { + "timestamp": "2025-01-04T01:32:54.445178", + "cpu_percent": 47.76, + "ram_percent": 47.9, + "ram_used_gb": 30.475635528564453, + "gpu_memory_used": 1241.0, + "relative_time": 147.5295627117157 + }, + { + "timestamp": "2025-01-04T01:32:55.520495", + "cpu_percent": 49.24, + "ram_percent": 47.9, + "ram_used_gb": 30.47634506225586, + "gpu_memory_used": 1236.0, + "relative_time": 148.5926468372345 + }, + { + "timestamp": "2025-01-04T01:32:56.591995", + "cpu_percent": 53.63, + "ram_percent": 47.9, + "ram_used_gb": 30.49687957763672, + "gpu_memory_used": 1251.0, + "relative_time": 149.72928547859192 + }, + { + "timestamp": "2025-01-04T01:32:57.727346", + "cpu_percent": 65.04, + "ram_percent": 48.1, + "ram_used_gb": 30.59111785888672, + "gpu_memory_used": 1251.0, + "relative_time": 150.86237502098083 + }, + { + "timestamp": "2025-01-04T01:32:58.862812", + "cpu_percent": 71.05, + "ram_percent": 48.2, + "ram_used_gb": 30.633731842041016, + "gpu_memory_used": 1263.0, + "relative_time": 152.03348207473755 + }, + { + "timestamp": "2025-01-04T01:33:00.037915", + "cpu_percent": 85.87, + "ram_percent": 48.2, + "ram_used_gb": 30.68001937866211, + "gpu_memory_used": 1253.0, + "relative_time": 153.1551034450531 + }, + { + "timestamp": "2025-01-04T01:33:01.158119", + "cpu_percent": 59.8, + "ram_percent": 48.2, + "ram_used_gb": 30.69198989868164, + "gpu_memory_used": 1252.0, + "relative_time": 154.2606840133667 + }, + { + "timestamp": "2025-01-04T01:33:02.262390", + "cpu_percent": 45.33, + "ram_percent": 48.3, + "ram_used_gb": 30.743839263916016, + "gpu_memory_used": 1252.0, + "relative_time": 155.3663365840912 + }, + { + "timestamp": "2025-01-04T01:33:03.369936", + "cpu_percent": 35.41, + "ram_percent": 48.2, + "ram_used_gb": 30.68472671508789, + "gpu_memory_used": 1252.0, + "relative_time": 156.4842345714569 + }, + { + "timestamp": "2025-01-04T01:33:04.488089", + "cpu_percent": 47.22, + "ram_percent": 48.4, + "ram_used_gb": 30.78485870361328, + "gpu_memory_used": 1254.0, + "relative_time": 157.58868670463562 + }, + { + "timestamp": "2025-01-04T01:33:05.592303", + "cpu_percent": 36.14, + "ram_percent": 48.5, + "ram_used_gb": 30.87320327758789, + "gpu_memory_used": 1254.0, + "relative_time": 158.71629786491394 + }, + { + "timestamp": "2025-01-04T01:33:06.721317", + "cpu_percent": 38.46, + "ram_percent": 48.2, + "ram_used_gb": 30.668170928955078, + "gpu_memory_used": 1254.0, + "relative_time": 159.82655477523804 + }, + { + "timestamp": "2025-01-04T01:33:07.827187", + "cpu_percent": 35.81, + "ram_percent": 48.4, + "ram_used_gb": 30.777912139892578, + "gpu_memory_used": 1254.0, + "relative_time": 160.94229197502136 + }, + { + "timestamp": "2025-01-04T01:33:08.943035", + "cpu_percent": 39.24, + "ram_percent": 48.5, + "ram_used_gb": 30.86941146850586, + "gpu_memory_used": 1254.0, + "relative_time": 162.06378889083862 + }, + { + "timestamp": "2025-01-04T01:33:10.063208", + "cpu_percent": 51.52, + "ram_percent": 48.1, + "ram_used_gb": 30.624229431152344, + "gpu_memory_used": 1254.0, + "relative_time": 163.16198420524597 + }, + { + "timestamp": "2025-01-04T01:33:11.163067", + "cpu_percent": 48.99, + "ram_percent": 48.1, + "ram_used_gb": 30.612281799316406, + "gpu_memory_used": 1254.0, + "relative_time": 164.26579809188843 + }, + { + "timestamp": "2025-01-04T01:33:12.266417", + "cpu_percent": 46.27, + "ram_percent": 48.1, + "ram_used_gb": 30.584861755371094, + "gpu_memory_used": 1252.0, + "relative_time": 165.35981583595276 + }, + { + "timestamp": "2025-01-04T01:33:13.354673", + "cpu_percent": 45.71, + "ram_percent": 48.1, + "ram_used_gb": 30.582279205322266, + "gpu_memory_used": 1252.0, + "relative_time": 166.45263361930847 + }, + { + "timestamp": "2025-01-04T01:33:14.447308", + "cpu_percent": 48.69, + "ram_percent": 48.1, + "ram_used_gb": 30.584793090820312, + "gpu_memory_used": 1252.0, + "relative_time": 167.54857754707336 + }, + { + "timestamp": "2025-01-04T01:33:15.552042", + "cpu_percent": 48.66, + "ram_percent": 48.1, + "ram_used_gb": 30.580883026123047, + "gpu_memory_used": 1252.0, + "relative_time": 168.659592628479 + }, + { + "timestamp": "2025-01-04T01:33:16.653015", + "cpu_percent": 50.37, + "ram_percent": 48.1, + "ram_used_gb": 30.573726654052734, + "gpu_memory_used": 1252.0, + "relative_time": 169.7969992160797 + }, + { + "timestamp": "2025-01-04T01:33:17.802854", + "cpu_percent": 49.45, + "ram_percent": 48.1, + "ram_used_gb": 30.587318420410156, + "gpu_memory_used": 1252.0, + "relative_time": 170.891606092453 + }, + { + "timestamp": "2025-01-04T01:33:18.893192", + "cpu_percent": 50.16, + "ram_percent": 48.1, + "ram_used_gb": 30.5953369140625, + "gpu_memory_used": 1252.0, + "relative_time": 172.0133557319641 + }, + { + "timestamp": "2025-01-04T01:33:20.008593", + "cpu_percent": 47.57, + "ram_percent": 48.1, + "ram_used_gb": 30.6124267578125, + "gpu_memory_used": 1252.0, + "relative_time": 173.0913679599762 + }, + { + "timestamp": "2025-01-04T01:33:21.097576", + "cpu_percent": 44.32, + "ram_percent": 48.1, + "ram_used_gb": 30.584686279296875, + "gpu_memory_used": 1252.0, + "relative_time": 174.20030999183655 + }, + { + "timestamp": "2025-01-04T01:33:22.201335", + "cpu_percent": 49.01, + "ram_percent": 48.0, + "ram_used_gb": 30.547630310058594, + "gpu_memory_used": 1252.0, + "relative_time": 175.30235862731934 + }, + { + "timestamp": "2025-01-04T01:33:23.306131", + "cpu_percent": 43.7, + "ram_percent": 48.0, + "ram_used_gb": 30.559757232666016, + "gpu_memory_used": 1251.0, + "relative_time": 176.40550017356873 + }, + { + "timestamp": "2025-01-04T01:33:24.408896", + "cpu_percent": 48.77, + "ram_percent": 48.0, + "ram_used_gb": 30.5601806640625, + "gpu_memory_used": 1251.0, + "relative_time": 177.4984576702118 + }, + { + "timestamp": "2025-01-04T01:33:25.496705", + "cpu_percent": 50.56, + "ram_percent": 48.0, + "ram_used_gb": 30.556926727294922, + "gpu_memory_used": 1251.0, + "relative_time": 178.58782863616943 + }, + { + "timestamp": "2025-01-04T01:33:26.588438", + "cpu_percent": 47.76, + "ram_percent": 48.0, + "ram_used_gb": 30.53600311279297, + "gpu_memory_used": 1251.0, + "relative_time": 179.67969870567322 + }, + { + "timestamp": "2025-01-04T01:33:27.679807", + "cpu_percent": 49.0, + "ram_percent": 48.0, + "ram_used_gb": 30.540546417236328, + "gpu_memory_used": 1251.0, + "relative_time": 180.78388810157776 + }, + { + "timestamp": "2025-01-04T01:33:28.780263", + "cpu_percent": 49.25, + "ram_percent": 48.0, + "ram_used_gb": 30.55233383178711, + "gpu_memory_used": 1251.0, + "relative_time": 181.88185930252075 + }, + { + "timestamp": "2025-01-04T01:33:29.881869", + "cpu_percent": 47.08, + "ram_percent": 48.1, + "ram_used_gb": 30.56603240966797, + "gpu_memory_used": 1251.0, + "relative_time": 182.9666450023651 + }, + { + "timestamp": "2025-01-04T01:33:30.957821", + "cpu_percent": 45.77, + "ram_percent": 48.0, + "ram_used_gb": 30.559410095214844, + "gpu_memory_used": 1251.0, + "relative_time": 184.05338644981384 + }, + { + "timestamp": "2025-01-04T01:33:32.047377", + "cpu_percent": 50.79, + "ram_percent": 48.0, + "ram_used_gb": 30.534175872802734, + "gpu_memory_used": 1251.0, + "relative_time": 185.17484974861145 + }, + { + "timestamp": "2025-01-04T01:33:33.167413", + "cpu_percent": 52.13, + "ram_percent": 48.0, + "ram_used_gb": 30.54046630859375, + "gpu_memory_used": 1266.0, + "relative_time": 186.23550605773926 + }, + { + "timestamp": "2025-01-04T01:33:34.226743", + "cpu_percent": 43.81, + "ram_percent": 48.0, + "ram_used_gb": 30.54621124267578, + "gpu_memory_used": 1266.0, + "relative_time": 187.30887961387634 + }, + { + "timestamp": "2025-01-04T01:33:35.303398", + "cpu_percent": 49.28, + "ram_percent": 48.0, + "ram_used_gb": 30.545230865478516, + "gpu_memory_used": 1264.0, + "relative_time": 188.40410709381104 + }, + { + "timestamp": "2025-01-04T01:33:36.405660", + "cpu_percent": 46.44, + "ram_percent": 48.0, + "ram_used_gb": 30.540679931640625, + "gpu_memory_used": 1264.0, + "relative_time": 189.47515082359314 + }, + { + "timestamp": "2025-01-04T01:33:37.469955", + "cpu_percent": 41.6, + "ram_percent": 48.0, + "ram_used_gb": 30.562320709228516, + "gpu_memory_used": 1264.0, + "relative_time": 190.56309294700623 + }, + { + "timestamp": "2025-01-04T01:33:38.556728", + "cpu_percent": 50.52, + "ram_percent": 48.0, + "ram_used_gb": 30.561084747314453, + "gpu_memory_used": 1264.0, + "relative_time": 191.66572499275208 + }, + { + "timestamp": "2025-01-04T01:33:39.665385", + "cpu_percent": 40.93, + "ram_percent": 48.1, + "ram_used_gb": 30.577682495117188, + "gpu_memory_used": 1264.0, + "relative_time": 192.76011109352112 + }, + { + "timestamp": "2025-01-04T01:33:40.754482", + "cpu_percent": 50.46, + "ram_percent": 48.1, + "ram_used_gb": 30.5740966796875, + "gpu_memory_used": 1262.0, + "relative_time": 193.90924453735352 + }, + { + "timestamp": "2025-01-04T01:33:41.903437", + "cpu_percent": 52.75, + "ram_percent": 48.1, + "ram_used_gb": 30.58869171142578, + "gpu_memory_used": 1258.0, + "relative_time": 195.0148274898529 + }, + { + "timestamp": "2025-01-04T01:33:43.008520", + "cpu_percent": 50.04, + "ram_percent": 48.0, + "ram_used_gb": 30.560386657714844, + "gpu_memory_used": 1258.0, + "relative_time": 196.12349009513855 + }, + { + "timestamp": "2025-01-04T01:33:44.129194", + "cpu_percent": 51.56, + "ram_percent": 48.1, + "ram_used_gb": 30.572277069091797, + "gpu_memory_used": 1258.0, + "relative_time": 197.20997285842896 + }, + { + "timestamp": "2025-01-04T01:33:45.212927", + "cpu_percent": 47.77, + "ram_percent": 48.0, + "ram_used_gb": 30.556873321533203, + "gpu_memory_used": 1258.0, + "relative_time": 198.29724264144897 + }, + { + "timestamp": "2025-01-04T01:33:46.288883", + "cpu_percent": 46.07, + "ram_percent": 48.0, + "ram_used_gb": 30.554439544677734, + "gpu_memory_used": 1258.0, + "relative_time": 199.39549779891968 + }, + { + "timestamp": "2025-01-04T01:33:47.403171", + "cpu_percent": 46.18, + "ram_percent": 48.0, + "ram_used_gb": 30.557025909423828, + "gpu_memory_used": 1258.0, + "relative_time": 200.50221276283264 + }, + { + "timestamp": "2025-01-04T01:33:48.495515", + "cpu_percent": 48.09, + "ram_percent": 48.0, + "ram_used_gb": 30.558856964111328, + "gpu_memory_used": 1267.0, + "relative_time": 201.62405467033386 + }, + { + "timestamp": "2025-01-04T01:33:49.630725", + "cpu_percent": 53.47, + "ram_percent": 48.1, + "ram_used_gb": 30.59896469116211, + "gpu_memory_used": 1283.0, + "relative_time": 202.70162987709045 + }, + { + "timestamp": "2025-01-04T01:33:50.709226", + "cpu_percent": 44.74, + "ram_percent": 48.1, + "ram_used_gb": 30.581470489501953, + "gpu_memory_used": 1281.0, + "relative_time": 203.78962469100952 + }, + { + "timestamp": "2025-01-04T01:33:51.782302", + "cpu_percent": 43.4, + "ram_percent": 48.1, + "ram_used_gb": 30.582977294921875, + "gpu_memory_used": 1282.0, + "relative_time": 204.87054562568665 + }, + { + "timestamp": "2025-01-04T01:33:52.868020", + "cpu_percent": 51.75, + "ram_percent": 48.0, + "ram_used_gb": 30.540206909179688, + "gpu_memory_used": 1282.0, + "relative_time": 205.95602416992188 + }, + { + "timestamp": "2025-01-04T01:33:53.956023", + "cpu_percent": 46.36, + "ram_percent": 48.0, + "ram_used_gb": 30.562763214111328, + "gpu_memory_used": 1282.0, + "relative_time": 207.06639337539673 + }, + { + "timestamp": "2025-01-04T01:33:55.064043", + "cpu_percent": 43.91, + "ram_percent": 48.0, + "ram_used_gb": 30.560302734375, + "gpu_memory_used": 1277.0, + "relative_time": 208.16699743270874 + }, + { + "timestamp": "2025-01-04T01:33:56.170674", + "cpu_percent": 50.01, + "ram_percent": 48.1, + "ram_used_gb": 30.576671600341797, + "gpu_memory_used": 1281.0, + "relative_time": 209.28660559654236 + }, + { + "timestamp": "2025-01-04T01:33:57.288316", + "cpu_percent": 50.51, + "ram_percent": 48.0, + "ram_used_gb": 30.551471710205078, + "gpu_memory_used": 1280.0, + "relative_time": 210.4030442237854 + }, + { + "timestamp": "2025-01-04T01:33:58.407032", + "cpu_percent": 49.43, + "ram_percent": 48.1, + "ram_used_gb": 30.576725006103516, + "gpu_memory_used": 1280.0, + "relative_time": 211.50494027137756 + }, + { + "timestamp": "2025-01-04T01:33:59.497806", + "cpu_percent": 46.68, + "ram_percent": 48.1, + "ram_used_gb": 30.59314727783203, + "gpu_memory_used": 1279.0, + "relative_time": 212.6002950668335 + }, + { + "timestamp": "2025-01-04T01:34:00.598484", + "cpu_percent": 57.44, + "ram_percent": 48.1, + "ram_used_gb": 30.60983657836914, + "gpu_memory_used": 1285.0, + "relative_time": 213.7150914669037 + }, + { + "timestamp": "2025-01-04T01:34:01.719968", + "cpu_percent": 54.58, + "ram_percent": 48.1, + "ram_used_gb": 30.586456298828125, + "gpu_memory_used": 1283.0, + "relative_time": 214.80932760238647 + }, + { + "timestamp": "2025-01-04T01:34:02.807573", + "cpu_percent": 61.69, + "ram_percent": 48.0, + "ram_used_gb": 30.53356170654297, + "gpu_memory_used": 1281.0, + "relative_time": 215.88946890830994 + }, + { + "timestamp": "2025-01-04T01:34:03.885672", + "cpu_percent": 49.46, + "ram_percent": 48.0, + "ram_used_gb": 30.517364501953125, + "gpu_memory_used": 1283.0, + "relative_time": 216.97114062309265 + }, + { + "timestamp": "2025-01-04T01:34:04.974449", + "cpu_percent": 42.69, + "ram_percent": 48.0, + "ram_used_gb": 30.527969360351562, + "gpu_memory_used": 1285.0, + "relative_time": 218.10192775726318 + }, + { + "timestamp": "2025-01-04T01:34:06.107947", + "cpu_percent": 54.87, + "ram_percent": 48.0, + "ram_used_gb": 30.51028823852539, + "gpu_memory_used": 1273.0, + "relative_time": 219.17600679397583 + }, + { + "timestamp": "2025-01-04T01:34:07.172153", + "cpu_percent": 45.42, + "ram_percent": 48.0, + "ram_used_gb": 30.509258270263672, + "gpu_memory_used": 1273.0, + "relative_time": 220.28902983665466 + }, + { + "timestamp": "2025-01-04T01:34:08.289623", + "cpu_percent": 52.75, + "ram_percent": 48.0, + "ram_used_gb": 30.52011489868164, + "gpu_memory_used": 1272.0, + "relative_time": 221.39960098266602 + }, + { + "timestamp": "2025-01-04T01:34:09.406158", + "cpu_percent": 52.53, + "ram_percent": 48.0, + "ram_used_gb": 30.52783966064453, + "gpu_memory_used": 1265.0, + "relative_time": 222.49749565124512 + }, + { + "timestamp": "2025-01-04T01:34:10.491042", + "cpu_percent": 56.49, + "ram_percent": 48.0, + "ram_used_gb": 30.540733337402344, + "gpu_memory_used": 1261.0, + "relative_time": 223.5777132511139 + }, + { + "timestamp": "2025-01-04T01:34:11.577710", + "cpu_percent": 44.25, + "ram_percent": 48.0, + "ram_used_gb": 30.531757354736328, + "gpu_memory_used": 1262.0, + "relative_time": 224.68288159370422 + }, + { + "timestamp": "2025-01-04T01:34:12.682455", + "cpu_percent": 47.56, + "ram_percent": 48.0, + "ram_used_gb": 30.50157928466797, + "gpu_memory_used": 1262.0, + "relative_time": 225.78287291526794 + }, + { + "timestamp": "2025-01-04T01:34:13.782976", + "cpu_percent": 48.52, + "ram_percent": 48.0, + "ram_used_gb": 30.507736206054688, + "gpu_memory_used": 1262.0, + "relative_time": 226.8910207748413 + }, + { + "timestamp": "2025-01-04T01:34:14.884200", + "cpu_percent": 49.89, + "ram_percent": 48.0, + "ram_used_gb": 30.50653076171875, + "gpu_memory_used": 1263.0, + "relative_time": 228.04418087005615 + }, + { + "timestamp": "2025-01-04T01:34:16.051189", + "cpu_percent": 49.34, + "ram_percent": 48.0, + "ram_used_gb": 30.504470825195312, + "gpu_memory_used": 1263.0, + "relative_time": 229.13680815696716 + }, + { + "timestamp": "2025-01-04T01:34:17.136588", + "cpu_percent": 47.8, + "ram_percent": 47.9, + "ram_used_gb": 30.496841430664062, + "gpu_memory_used": 1263.0, + "relative_time": 230.26778984069824 + }, + { + "timestamp": "2025-01-04T01:34:18.269616", + "cpu_percent": 48.23, + "ram_percent": 48.0, + "ram_used_gb": 30.50909423828125, + "gpu_memory_used": 1262.0, + "relative_time": 231.3880865573883 + }, + { + "timestamp": "2025-01-04T01:34:19.387759", + "cpu_percent": 42.46, + "ram_percent": 48.0, + "ram_used_gb": 30.525142669677734, + "gpu_memory_used": 1262.0, + "relative_time": 232.4770486354828 + }, + { + "timestamp": "2025-01-04T01:34:20.471629", + "cpu_percent": 44.17, + "ram_percent": 48.0, + "ram_used_gb": 30.535388946533203, + "gpu_memory_used": 1259.0, + "relative_time": 233.57954168319702 + }, + { + "timestamp": "2025-01-04T01:34:21.576615", + "cpu_percent": 45.36, + "ram_percent": 48.0, + "ram_used_gb": 30.529708862304688, + "gpu_memory_used": 1259.0, + "relative_time": 234.70528435707092 + }, + { + "timestamp": "2025-01-04T01:34:22.709825", + "cpu_percent": 52.14, + "ram_percent": 47.9, + "ram_used_gb": 30.490406036376953, + "gpu_memory_used": 1259.0, + "relative_time": 235.84367108345032 + }, + { + "timestamp": "2025-01-04T01:34:23.834912", + "cpu_percent": 49.39, + "ram_percent": 47.9, + "ram_used_gb": 30.49042510986328, + "gpu_memory_used": 1259.0, + "relative_time": 236.94777131080627 + }, + { + "timestamp": "2025-01-04T01:34:24.940884", + "cpu_percent": 51.84, + "ram_percent": 47.9, + "ram_used_gb": 30.489459991455078, + "gpu_memory_used": 1259.0, + "relative_time": 238.07107305526733 + }, + { + "timestamp": "2025-01-04T01:34:26.077527", + "cpu_percent": 49.55, + "ram_percent": 47.9, + "ram_used_gb": 30.488842010498047, + "gpu_memory_used": 1259.0, + "relative_time": 239.20314645767212 + }, + { + "timestamp": "2025-01-04T01:34:27.199360", + "cpu_percent": 47.71, + "ram_percent": 47.9, + "ram_used_gb": 30.49380874633789, + "gpu_memory_used": 1258.0, + "relative_time": 240.32860612869263 + }, + { + "timestamp": "2025-01-04T01:34:28.333600", + "cpu_percent": 48.61, + "ram_percent": 48.0, + "ram_used_gb": 30.503887176513672, + "gpu_memory_used": 1258.0, + "relative_time": 241.44983053207397 + }, + { + "timestamp": "2025-01-04T01:34:29.453855", + "cpu_percent": 51.01, + "ram_percent": 48.0, + "ram_used_gb": 30.512046813964844, + "gpu_memory_used": 1258.0, + "relative_time": 242.60703372955322 + }, + { + "timestamp": "2025-01-04T01:34:30.613699", + "cpu_percent": 53.89, + "ram_percent": 48.0, + "ram_used_gb": 30.522415161132812, + "gpu_memory_used": 1258.0, + "relative_time": 243.73219799995422 + }, + { + "timestamp": "2025-01-04T01:34:31.735503", + "cpu_percent": 21.25, + "ram_percent": 48.2, + "ram_used_gb": 30.68771743774414, + "gpu_memory_used": 1260.0, + "relative_time": 244.80069231987 + } + ], + "test_duration": 247.14976453781128 +} \ No newline at end of file diff --git a/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt new file mode 100644 index 0000000..541a304 --- /dev/null +++ b/examples/assorted_checks/benchmarks/output_data/cpu_benchmark_stats_8_4_par.txt @@ -0,0 +1,23 @@ +=== Benchmark Statistics (with correct RTF) === + +Total tokens processed: 1800 +Total audio generated (s): 568.53 +Total test duration (s): 244.10 +Average processing rate (tokens/s): 7.34 +Average RTF: 0.43 +Average Real Time Speed: 2.33 + +=== Per-chunk Stats === + +Average chunk size (tokens): 600.00 +Min chunk size (tokens): 300 +Max chunk size (tokens): 900 +Average processing time (s): 81.30 +Average output length (s): 189.51 + +=== Performance Ranges === + +Processing rate range (tokens/s): 7.21 - 7.47 +RTF range: 0.43x - 0.43x +Real Time Speed range: 2.33x - 2.33x + diff --git a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json new file mode 100644 index 0000000..ccac37e --- /dev/null +++ b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_results_rtf.json @@ -0,0 +1,1253 @@ +{ + "results": [ + { + "tokens": 150, + "processing_time": 1.86, + "output_length": 45.9, + "rtf": 0.04, + "elapsed_time": 1.92 + }, + { + "tokens": 300, + "processing_time": 3.08, + "output_length": 96.425, + "rtf": 0.03, + "elapsed_time": 5.06 + }, + { + "tokens": 450, + "processing_time": 4.4, + "output_length": 143.1, + "rtf": 0.03, + "elapsed_time": 9.53 + }, + { + "tokens": 600, + "processing_time": 6.47, + "output_length": 188.675, + "rtf": 0.03, + "elapsed_time": 16.06 + }, + { + "tokens": 750, + "processing_time": 8.32, + "output_length": 236.7, + "rtf": 0.04, + "elapsed_time": 24.45 + }, + { + "tokens": 900, + "processing_time": 8.92, + "output_length": 283.425, + "rtf": 0.03, + "elapsed_time": 33.45 + }, + { + "tokens": 2000, + "processing_time": 18.55, + "output_length": 624.325, + "rtf": 0.03, + "elapsed_time": 52.14 + }, + { + "tokens": 3000, + "processing_time": 23.98, + "output_length": 931.15, + "rtf": 0.03, + "elapsed_time": 76.32 + }, + { + "tokens": 4000, + "processing_time": 32.93, + "output_length": 1222.1, + "rtf": 0.03, + "elapsed_time": 109.53 + }, + { + "tokens": 5000, + "processing_time": 45.39, + "output_length": 1524.575, + "rtf": 0.03, + "elapsed_time": 155.23 + } + ], + "system_metrics": [ + { + "timestamp": "2025-01-04T02:37:52.172368", + "cpu_percent": 11.51, + "ram_percent": 52.8, + "ram_used_gb": 33.61172866821289, + "gpu_memory_used": 3216.0, + "relative_time": 0.08031892776489258 + }, + { + "timestamp": "2025-01-04T02:37:53.266071", + "cpu_percent": 15.33, + "ram_percent": 52.9, + "ram_used_gb": 33.678314208984375, + "gpu_memory_used": 3392.0, + "relative_time": 1.1673684120178223 + }, + { + "timestamp": "2025-01-04T02:37:54.352909", + "cpu_percent": 15.3, + "ram_percent": 53.0, + "ram_used_gb": 33.712764739990234, + "gpu_memory_used": 3667.0, + "relative_time": 2.253591537475586 + }, + { + "timestamp": "2025-01-04T02:37:55.439413", + "cpu_percent": 23.51, + "ram_percent": 52.7, + "ram_used_gb": 33.49789810180664, + "gpu_memory_used": 3662.0, + "relative_time": 3.3292760848999023 + }, + { + "timestamp": "2025-01-04T02:37:56.511211", + "cpu_percent": 14.69, + "ram_percent": 52.7, + "ram_used_gb": 33.494102478027344, + "gpu_memory_used": 3668.0, + "relative_time": 4.397106885910034 + }, + { + "timestamp": "2025-01-04T02:37:57.582176", + "cpu_percent": 11.01, + "ram_percent": 52.8, + "ram_used_gb": 33.564491271972656, + "gpu_memory_used": 3665.0, + "relative_time": 5.46670126914978 + }, + { + "timestamp": "2025-01-04T02:37:58.637969", + "cpu_percent": 15.04, + "ram_percent": 52.8, + "ram_used_gb": 33.555362701416016, + "gpu_memory_used": 3668.0, + "relative_time": 6.523184061050415 + }, + { + "timestamp": "2025-01-04T02:37:59.700880", + "cpu_percent": 13.32, + "ram_percent": 52.8, + "ram_used_gb": 33.559967041015625, + "gpu_memory_used": 3668.0, + "relative_time": 7.589032888412476 + }, + { + "timestamp": "2025-01-04T02:38:00.773895", + "cpu_percent": 12.45, + "ram_percent": 52.8, + "ram_used_gb": 33.609134674072266, + "gpu_memory_used": 3667.0, + "relative_time": 8.677486181259155 + }, + { + "timestamp": "2025-01-04T02:38:01.851195", + "cpu_percent": 12.62, + "ram_percent": 52.9, + "ram_used_gb": 33.67635726928711, + "gpu_memory_used": 3665.0, + "relative_time": 9.734971046447754 + }, + { + "timestamp": "2025-01-04T02:38:02.907897", + "cpu_percent": 20.61, + "ram_percent": 53.0, + "ram_used_gb": 33.72555160522461, + "gpu_memory_used": 3660.0, + "relative_time": 10.813292026519775 + }, + { + "timestamp": "2025-01-04T02:38:03.996322", + "cpu_percent": 33.24, + "ram_percent": 53.2, + "ram_used_gb": 33.832088470458984, + "gpu_memory_used": 3660.0, + "relative_time": 11.917856454849243 + }, + { + "timestamp": "2025-01-04T02:38:05.101973", + "cpu_percent": 14.24, + "ram_percent": 53.0, + "ram_used_gb": 33.7408447265625, + "gpu_memory_used": 3662.0, + "relative_time": 12.986546277999878 + }, + { + "timestamp": "2025-01-04T02:38:06.162037", + "cpu_percent": 14.38, + "ram_percent": 53.1, + "ram_used_gb": 33.774169921875, + "gpu_memory_used": 3662.0, + "relative_time": 14.062608242034912 + }, + { + "timestamp": "2025-01-04T02:38:07.248210", + "cpu_percent": 14.39, + "ram_percent": 53.2, + "ram_used_gb": 33.83738327026367, + "gpu_memory_used": 4029.0, + "relative_time": 15.156044960021973 + }, + { + "timestamp": "2025-01-04T02:38:08.329582", + "cpu_percent": 31.18, + "ram_percent": 53.2, + "ram_used_gb": 33.87126541137695, + "gpu_memory_used": 4032.0, + "relative_time": 16.249940395355225 + }, + { + "timestamp": "2025-01-04T02:38:09.432992", + "cpu_percent": 19.33, + "ram_percent": 53.2, + "ram_used_gb": 33.842403411865234, + "gpu_memory_used": 4032.0, + "relative_time": 17.331223011016846 + }, + { + "timestamp": "2025-01-04T02:38:10.505101", + "cpu_percent": 13.34, + "ram_percent": 53.2, + "ram_used_gb": 33.86738967895508, + "gpu_memory_used": 4029.0, + "relative_time": 18.390397548675537 + }, + { + "timestamp": "2025-01-04T02:38:11.570033", + "cpu_percent": 12.61, + "ram_percent": 53.4, + "ram_used_gb": 33.938289642333984, + "gpu_memory_used": 4028.0, + "relative_time": 19.477521181106567 + }, + { + "timestamp": "2025-01-04T02:38:12.663780", + "cpu_percent": 15.78, + "ram_percent": 53.4, + "ram_used_gb": 33.969398498535156, + "gpu_memory_used": 4030.0, + "relative_time": 20.57425808906555 + }, + { + "timestamp": "2025-01-04T02:38:13.750065", + "cpu_percent": 18.69, + "ram_percent": 53.5, + "ram_used_gb": 34.03954315185547, + "gpu_memory_used": 4021.0, + "relative_time": 21.652076244354248 + }, + { + "timestamp": "2025-01-04T02:38:14.825318", + "cpu_percent": 10.48, + "ram_percent": 53.6, + "ram_used_gb": 34.07048416137695, + "gpu_memory_used": 4025.0, + "relative_time": 22.73010230064392 + }, + { + "timestamp": "2025-01-04T02:38:15.912340", + "cpu_percent": 12.53, + "ram_percent": 53.6, + "ram_used_gb": 34.09389877319336, + "gpu_memory_used": 4026.0, + "relative_time": 23.81609869003296 + }, + { + "timestamp": "2025-01-04T02:38:17.003329", + "cpu_percent": 16.09, + "ram_percent": 53.7, + "ram_used_gb": 34.1781120300293, + "gpu_memory_used": 4025.0, + "relative_time": 24.90904140472412 + }, + { + "timestamp": "2025-01-04T02:38:18.079837", + "cpu_percent": 14.98, + "ram_percent": 53.8, + "ram_used_gb": 34.21260070800781, + "gpu_memory_used": 4025.0, + "relative_time": 25.986279249191284 + }, + { + "timestamp": "2025-01-04T02:38:19.167635", + "cpu_percent": 14.85, + "ram_percent": 53.8, + "ram_used_gb": 34.23923873901367, + "gpu_memory_used": 4024.0, + "relative_time": 27.076823234558105 + }, + { + "timestamp": "2025-01-04T02:38:20.258141", + "cpu_percent": 15.05, + "ram_percent": 53.9, + "ram_used_gb": 34.26483917236328, + "gpu_memory_used": 4015.0, + "relative_time": 28.144607067108154 + }, + { + "timestamp": "2025-01-04T02:38:21.315694", + "cpu_percent": 17.08, + "ram_percent": 53.9, + "ram_used_gb": 34.31473922729492, + "gpu_memory_used": 4016.0, + "relative_time": 29.20189356803894 + }, + { + "timestamp": "2025-01-04T02:38:22.388259", + "cpu_percent": 17.47, + "ram_percent": 54.0, + "ram_used_gb": 34.35490798950195, + "gpu_memory_used": 4016.0, + "relative_time": 30.28918957710266 + }, + { + "timestamp": "2025-01-04T02:38:23.463469", + "cpu_percent": 15.76, + "ram_percent": 54.0, + "ram_used_gb": 34.33717346191406, + "gpu_memory_used": 4002.0, + "relative_time": 31.364880561828613 + }, + { + "timestamp": "2025-01-04T02:38:24.540334", + "cpu_percent": 13.54, + "ram_percent": 54.1, + "ram_used_gb": 34.38197708129883, + "gpu_memory_used": 3999.0, + "relative_time": 32.4253191947937 + }, + { + "timestamp": "2025-01-04T02:38:25.597934", + "cpu_percent": 13.99, + "ram_percent": 54.2, + "ram_used_gb": 34.48365783691406, + "gpu_memory_used": 4004.0, + "relative_time": 33.50029754638672 + }, + { + "timestamp": "2025-01-04T02:38:26.673108", + "cpu_percent": 15.16, + "ram_percent": 54.2, + "ram_used_gb": 34.50083923339844, + "gpu_memory_used": 4011.0, + "relative_time": 34.5756139755249 + }, + { + "timestamp": "2025-01-04T02:38:27.748147", + "cpu_percent": 17.68, + "ram_percent": 54.2, + "ram_used_gb": 34.49884033203125, + "gpu_memory_used": 4016.0, + "relative_time": 35.650988817214966 + }, + { + "timestamp": "2025-01-04T02:38:28.835603", + "cpu_percent": 26.81, + "ram_percent": 54.3, + "ram_used_gb": 34.536773681640625, + "gpu_memory_used": 4015.0, + "relative_time": 36.73981595039368 + }, + { + "timestamp": "2025-01-04T02:38:29.912604", + "cpu_percent": 27.61, + "ram_percent": 54.3, + "ram_used_gb": 34.56916427612305, + "gpu_memory_used": 4016.0, + "relative_time": 37.81279993057251 + }, + { + "timestamp": "2025-01-04T02:38:30.984988", + "cpu_percent": 34.24, + "ram_percent": 54.4, + "ram_used_gb": 34.599365234375, + "gpu_memory_used": 4014.0, + "relative_time": 38.89973425865173 + }, + { + "timestamp": "2025-01-04T02:38:32.071596", + "cpu_percent": 31.95, + "ram_percent": 54.2, + "ram_used_gb": 34.46506881713867, + "gpu_memory_used": 4014.0, + "relative_time": 39.95902729034424 + }, + { + "timestamp": "2025-01-04T02:38:33.140836", + "cpu_percent": 27.78, + "ram_percent": 54.3, + "ram_used_gb": 34.51242446899414, + "gpu_memory_used": 4014.0, + "relative_time": 41.0454580783844 + }, + { + "timestamp": "2025-01-04T02:38:34.229919", + "cpu_percent": 21.09, + "ram_percent": 54.3, + "ram_used_gb": 34.513973236083984, + "gpu_memory_used": 4011.0, + "relative_time": 42.133435010910034 + }, + { + "timestamp": "2025-01-04T02:38:35.317486", + "cpu_percent": 17.26, + "ram_percent": 53.9, + "ram_used_gb": 34.3167839050293, + "gpu_memory_used": 4020.0, + "relative_time": 43.21739077568054 + }, + { + "timestamp": "2025-01-04T02:38:36.394375", + "cpu_percent": 12.32, + "ram_percent": 54.0, + "ram_used_gb": 34.34043884277344, + "gpu_memory_used": 4020.0, + "relative_time": 44.27889919281006 + }, + { + "timestamp": "2025-01-04T02:38:37.454005", + "cpu_percent": 12.46, + "ram_percent": 54.0, + "ram_used_gb": 34.37453842163086, + "gpu_memory_used": 4020.0, + "relative_time": 45.341508626937866 + }, + { + "timestamp": "2025-01-04T02:38:38.515337", + "cpu_percent": 14.16, + "ram_percent": 54.1, + "ram_used_gb": 34.401729583740234, + "gpu_memory_used": 4019.0, + "relative_time": 46.410696506500244 + }, + { + "timestamp": "2025-01-04T02:38:39.593044", + "cpu_percent": 13.71, + "ram_percent": 54.1, + "ram_used_gb": 34.435630798339844, + "gpu_memory_used": 4019.0, + "relative_time": 47.48556661605835 + }, + { + "timestamp": "2025-01-04T02:38:40.665509", + "cpu_percent": 13.17, + "ram_percent": 54.2, + "ram_used_gb": 34.49795150756836, + "gpu_memory_used": 4016.0, + "relative_time": 48.551952838897705 + }, + { + "timestamp": "2025-01-04T02:38:41.724929", + "cpu_percent": 12.67, + "ram_percent": 54.3, + "ram_used_gb": 34.52568054199219, + "gpu_memory_used": 4011.0, + "relative_time": 49.61591196060181 + }, + { + "timestamp": "2025-01-04T02:38:42.801080", + "cpu_percent": 12.83, + "ram_percent": 54.4, + "ram_used_gb": 34.579071044921875, + "gpu_memory_used": 4007.0, + "relative_time": 50.70357823371887 + }, + { + "timestamp": "2025-01-04T02:38:43.884984", + "cpu_percent": 12.31, + "ram_percent": 54.4, + "ram_used_gb": 34.59829330444336, + "gpu_memory_used": 4003.0, + "relative_time": 51.771891832351685 + }, + { + "timestamp": "2025-01-04T02:38:44.957477", + "cpu_percent": 12.58, + "ram_percent": 54.7, + "ram_used_gb": 34.76633071899414, + "gpu_memory_used": 4003.0, + "relative_time": 52.859192848205566 + }, + { + "timestamp": "2025-01-04T02:38:46.031581", + "cpu_percent": 14.48, + "ram_percent": 54.6, + "ram_used_gb": 34.76308059692383, + "gpu_memory_used": 4013.0, + "relative_time": 53.91648840904236 + }, + { + "timestamp": "2025-01-04T02:38:47.091693", + "cpu_percent": 14.35, + "ram_percent": 54.7, + "ram_used_gb": 34.81193923950195, + "gpu_memory_used": 4013.0, + "relative_time": 54.993882179260254 + }, + { + "timestamp": "2025-01-04T02:38:48.178826", + "cpu_percent": 16.46, + "ram_percent": 54.7, + "ram_used_gb": 34.784278869628906, + "gpu_memory_used": 4014.0, + "relative_time": 56.064146518707275 + }, + { + "timestamp": "2025-01-04T02:38:49.235997", + "cpu_percent": 12.84, + "ram_percent": 54.7, + "ram_used_gb": 34.79767608642578, + "gpu_memory_used": 4014.0, + "relative_time": 57.12374472618103 + }, + { + "timestamp": "2025-01-04T02:38:50.295962", + "cpu_percent": 15.69, + "ram_percent": 54.8, + "ram_used_gb": 34.8546257019043, + "gpu_memory_used": 4013.0, + "relative_time": 58.180296421051025 + }, + { + "timestamp": "2025-01-04T02:38:51.357678", + "cpu_percent": 14.54, + "ram_percent": 54.8, + "ram_used_gb": 34.8900260925293, + "gpu_memory_used": 4014.0, + "relative_time": 59.242270708084106 + }, + { + "timestamp": "2025-01-04T02:38:52.415380", + "cpu_percent": 14.74, + "ram_percent": 54.9, + "ram_used_gb": 34.92173767089844, + "gpu_memory_used": 4014.0, + "relative_time": 60.307114601135254 + }, + { + "timestamp": "2025-01-04T02:38:53.490598", + "cpu_percent": 13.82, + "ram_percent": 55.1, + "ram_used_gb": 35.028907775878906, + "gpu_memory_used": 4014.0, + "relative_time": 61.37576389312744 + }, + { + "timestamp": "2025-01-04T02:38:54.548660", + "cpu_percent": 11.31, + "ram_percent": 55.1, + "ram_used_gb": 35.05375289916992, + "gpu_memory_used": 4011.0, + "relative_time": 62.43392610549927 + }, + { + "timestamp": "2025-01-04T02:38:55.609900", + "cpu_percent": 14.35, + "ram_percent": 55.1, + "ram_used_gb": 35.03831100463867, + "gpu_memory_used": 4011.0, + "relative_time": 63.493370056152344 + }, + { + "timestamp": "2025-01-04T02:38:56.666032", + "cpu_percent": 13.11, + "ram_percent": 55.1, + "ram_used_gb": 35.07795333862305, + "gpu_memory_used": 4011.0, + "relative_time": 64.54955720901489 + }, + { + "timestamp": "2025-01-04T02:38:57.730782", + "cpu_percent": 16.01, + "ram_percent": 55.2, + "ram_used_gb": 35.11598587036133, + "gpu_memory_used": 4007.0, + "relative_time": 65.61445665359497 + }, + { + "timestamp": "2025-01-04T02:38:58.787051", + "cpu_percent": 13.68, + "ram_percent": 55.3, + "ram_used_gb": 35.15185546875, + "gpu_memory_used": 4007.0, + "relative_time": 66.67095923423767 + }, + { + "timestamp": "2025-01-04T02:38:59.847606", + "cpu_percent": 13.08, + "ram_percent": 55.3, + "ram_used_gb": 35.183753967285156, + "gpu_memory_used": 4003.0, + "relative_time": 67.73307466506958 + }, + { + "timestamp": "2025-01-04T02:39:00.918871", + "cpu_percent": 13.26, + "ram_percent": 55.4, + "ram_used_gb": 35.22275161743164, + "gpu_memory_used": 4005.0, + "relative_time": 68.80590057373047 + }, + { + "timestamp": "2025-01-04T02:39:01.991170", + "cpu_percent": 11.05, + "ram_percent": 55.3, + "ram_used_gb": 35.15507125854492, + "gpu_memory_used": 4005.0, + "relative_time": 69.8813705444336 + }, + { + "timestamp": "2025-01-04T02:39:03.058942", + "cpu_percent": 11.08, + "ram_percent": 55.4, + "ram_used_gb": 35.2095947265625, + "gpu_memory_used": 4005.0, + "relative_time": 70.94484400749207 + }, + { + "timestamp": "2025-01-04T02:39:04.116159", + "cpu_percent": 12.74, + "ram_percent": 55.4, + "ram_used_gb": 35.24392318725586, + "gpu_memory_used": 4005.0, + "relative_time": 72.00281810760498 + }, + { + "timestamp": "2025-01-04T02:39:05.187410", + "cpu_percent": 11.04, + "ram_percent": 55.4, + "ram_used_gb": 35.259830474853516, + "gpu_memory_used": 4005.0, + "relative_time": 73.07217526435852 + }, + { + "timestamp": "2025-01-04T02:39:06.248588", + "cpu_percent": 13.66, + "ram_percent": 55.5, + "ram_used_gb": 35.29854965209961, + "gpu_memory_used": 4005.0, + "relative_time": 74.13533973693848 + }, + { + "timestamp": "2025-01-04T02:39:07.321179", + "cpu_percent": 11.16, + "ram_percent": 55.6, + "ram_used_gb": 35.346981048583984, + "gpu_memory_used": 4005.0, + "relative_time": 75.18772435188293 + }, + { + "timestamp": "2025-01-04T02:39:08.362496", + "cpu_percent": 9.46, + "ram_percent": 56.1, + "ram_used_gb": 35.69393539428711, + "gpu_memory_used": 4006.0, + "relative_time": 76.25136637687683 + }, + { + "timestamp": "2025-01-04T02:39:09.438218", + "cpu_percent": 15.88, + "ram_percent": 56.1, + "ram_used_gb": 35.6658821105957, + "gpu_memory_used": 4006.0, + "relative_time": 77.32329249382019 + }, + { + "timestamp": "2025-01-04T02:39:10.495653", + "cpu_percent": 13.25, + "ram_percent": 56.1, + "ram_used_gb": 35.658119201660156, + "gpu_memory_used": 4014.0, + "relative_time": 78.38210940361023 + }, + { + "timestamp": "2025-01-04T02:39:11.560094", + "cpu_percent": 10.8, + "ram_percent": 56.1, + "ram_used_gb": 35.694610595703125, + "gpu_memory_used": 4014.0, + "relative_time": 79.44816374778748 + }, + { + "timestamp": "2025-01-04T02:39:12.621879", + "cpu_percent": 12.58, + "ram_percent": 56.1, + "ram_used_gb": 35.68545150756836, + "gpu_memory_used": 4014.0, + "relative_time": 80.51017951965332 + }, + { + "timestamp": "2025-01-04T02:39:13.692834", + "cpu_percent": 13.3, + "ram_percent": 56.2, + "ram_used_gb": 35.730979919433594, + "gpu_memory_used": 4012.0, + "relative_time": 81.57789969444275 + }, + { + "timestamp": "2025-01-04T02:39:14.753401", + "cpu_percent": 14.75, + "ram_percent": 56.2, + "ram_used_gb": 35.73103332519531, + "gpu_memory_used": 4012.0, + "relative_time": 82.63830900192261 + }, + { + "timestamp": "2025-01-04T02:39:15.811385", + "cpu_percent": 14.69, + "ram_percent": 56.2, + "ram_used_gb": 35.740108489990234, + "gpu_memory_used": 4011.0, + "relative_time": 83.69796371459961 + }, + { + "timestamp": "2025-01-04T02:39:16.874197", + "cpu_percent": 14.28, + "ram_percent": 56.2, + "ram_used_gb": 35.767982482910156, + "gpu_memory_used": 4010.0, + "relative_time": 84.76145887374878 + }, + { + "timestamp": "2025-01-04T02:39:17.936944", + "cpu_percent": 12.99, + "ram_percent": 56.3, + "ram_used_gb": 35.81233596801758, + "gpu_memory_used": 4010.0, + "relative_time": 85.84119439125061 + }, + { + "timestamp": "2025-01-04T02:39:19.027320", + "cpu_percent": 12.79, + "ram_percent": 56.6, + "ram_used_gb": 36.0085563659668, + "gpu_memory_used": 4010.0, + "relative_time": 86.91442775726318 + }, + { + "timestamp": "2025-01-04T02:39:20.088804", + "cpu_percent": 15.62, + "ram_percent": 56.9, + "ram_used_gb": 36.16616439819336, + "gpu_memory_used": 4006.0, + "relative_time": 88.00494360923767 + }, + { + "timestamp": "2025-01-04T02:39:21.191796", + "cpu_percent": 12.98, + "ram_percent": 57.1, + "ram_used_gb": 36.3217658996582, + "gpu_memory_used": 4005.0, + "relative_time": 89.12522411346436 + }, + { + "timestamp": "2025-01-04T02:39:22.311508", + "cpu_percent": 14.77, + "ram_percent": 56.9, + "ram_used_gb": 36.191429138183594, + "gpu_memory_used": 4005.0, + "relative_time": 90.19932198524475 + }, + { + "timestamp": "2025-01-04T02:39:23.372871", + "cpu_percent": 12.98, + "ram_percent": 57.1, + "ram_used_gb": 36.29658889770508, + "gpu_memory_used": 4005.0, + "relative_time": 91.3045928478241 + }, + { + "timestamp": "2025-01-04T02:39:24.479087", + "cpu_percent": 14.64, + "ram_percent": 57.1, + "ram_used_gb": 36.30413055419922, + "gpu_memory_used": 3998.0, + "relative_time": 92.36360597610474 + }, + { + "timestamp": "2025-01-04T02:39:25.537969", + "cpu_percent": 14.51, + "ram_percent": 57.1, + "ram_used_gb": 36.311763763427734, + "gpu_memory_used": 3998.0, + "relative_time": 93.42230415344238 + }, + { + "timestamp": "2025-01-04T02:39:26.594967", + "cpu_percent": 13.05, + "ram_percent": 57.1, + "ram_used_gb": 36.351402282714844, + "gpu_memory_used": 3998.0, + "relative_time": 94.47847175598145 + }, + { + "timestamp": "2025-01-04T02:39:27.652223", + "cpu_percent": 15.05, + "ram_percent": 57.2, + "ram_used_gb": 36.36949157714844, + "gpu_memory_used": 4004.0, + "relative_time": 95.53560948371887 + }, + { + "timestamp": "2025-01-04T02:39:28.708008", + "cpu_percent": 12.51, + "ram_percent": 57.2, + "ram_used_gb": 36.3841667175293, + "gpu_memory_used": 4004.0, + "relative_time": 96.59472155570984 + }, + { + "timestamp": "2025-01-04T02:39:29.768866", + "cpu_percent": 10.83, + "ram_percent": 57.2, + "ram_used_gb": 36.39939880371094, + "gpu_memory_used": 4004.0, + "relative_time": 97.6679356098175 + }, + { + "timestamp": "2025-01-04T02:39:30.844295", + "cpu_percent": 14.61, + "ram_percent": 57.3, + "ram_used_gb": 36.42519760131836, + "gpu_memory_used": 4004.0, + "relative_time": 98.74996089935303 + }, + { + "timestamp": "2025-01-04T02:39:31.934080", + "cpu_percent": 11.74, + "ram_percent": 57.0, + "ram_used_gb": 36.271087646484375, + "gpu_memory_used": 4004.0, + "relative_time": 99.81860518455505 + }, + { + "timestamp": "2025-01-04T02:39:32.989954", + "cpu_percent": 12.09, + "ram_percent": 57.2, + "ram_used_gb": 36.368350982666016, + "gpu_memory_used": 4010.0, + "relative_time": 100.87712931632996 + }, + { + "timestamp": "2025-01-04T02:39:34.061411", + "cpu_percent": 11.07, + "ram_percent": 57.2, + "ram_used_gb": 36.38072967529297, + "gpu_memory_used": 4010.0, + "relative_time": 101.946035861969 + }, + { + "timestamp": "2025-01-04T02:39:35.117182", + "cpu_percent": 9.32, + "ram_percent": 57.2, + "ram_used_gb": 36.367733001708984, + "gpu_memory_used": 4415.0, + "relative_time": 103.00355505943298 + }, + { + "timestamp": "2025-01-04T02:39:36.179256", + "cpu_percent": 12.93, + "ram_percent": 57.2, + "ram_used_gb": 36.396636962890625, + "gpu_memory_used": 4417.0, + "relative_time": 104.06347131729126 + }, + { + "timestamp": "2025-01-04T02:39:37.237454", + "cpu_percent": 10.94, + "ram_percent": 57.3, + "ram_used_gb": 36.429630279541016, + "gpu_memory_used": 4417.0, + "relative_time": 105.12580728530884 + }, + { + "timestamp": "2025-01-04T02:39:38.310321", + "cpu_percent": 12.86, + "ram_percent": 57.3, + "ram_used_gb": 36.44291305541992, + "gpu_memory_used": 4418.0, + "relative_time": 106.17753839492798 + }, + { + "timestamp": "2025-01-04T02:39:39.355358", + "cpu_percent": 10.82, + "ram_percent": 57.3, + "ram_used_gb": 36.46603012084961, + "gpu_memory_used": 4418.0, + "relative_time": 107.24251008033752 + }, + { + "timestamp": "2025-01-04T02:39:40.413524", + "cpu_percent": 14.64, + "ram_percent": 57.4, + "ram_used_gb": 36.507179260253906, + "gpu_memory_used": 4418.0, + "relative_time": 108.29774165153503 + }, + { + "timestamp": "2025-01-04T02:39:41.482368", + "cpu_percent": 10.03, + "ram_percent": 58.1, + "ram_used_gb": 36.93812942504883, + "gpu_memory_used": 4418.0, + "relative_time": 109.36836910247803 + }, + { + "timestamp": "2025-01-04T02:39:42.546204", + "cpu_percent": 12.63, + "ram_percent": 58.0, + "ram_used_gb": 36.87542724609375, + "gpu_memory_used": 4418.0, + "relative_time": 110.43055510520935 + }, + { + "timestamp": "2025-01-04T02:39:43.604666", + "cpu_percent": 14.14, + "ram_percent": 58.0, + "ram_used_gb": 36.875328063964844, + "gpu_memory_used": 4426.0, + "relative_time": 111.49229407310486 + }, + { + "timestamp": "2025-01-04T02:39:44.664973", + "cpu_percent": 10.64, + "ram_percent": 58.0, + "ram_used_gb": 36.88217544555664, + "gpu_memory_used": 4425.0, + "relative_time": 112.55481696128845 + }, + { + "timestamp": "2025-01-04T02:39:45.741305", + "cpu_percent": 13.92, + "ram_percent": 57.9, + "ram_used_gb": 36.85449981689453, + "gpu_memory_used": 4425.0, + "relative_time": 113.62504053115845 + }, + { + "timestamp": "2025-01-04T02:39:46.799656", + "cpu_percent": 13.15, + "ram_percent": 58.0, + "ram_used_gb": 36.870826721191406, + "gpu_memory_used": 4423.0, + "relative_time": 114.6845052242279 + }, + { + "timestamp": "2025-01-04T02:39:47.859725", + "cpu_percent": 13.85, + "ram_percent": 58.0, + "ram_used_gb": 36.870948791503906, + "gpu_memory_used": 4423.0, + "relative_time": 115.74664235115051 + }, + { + "timestamp": "2025-01-04T02:39:48.919071", + "cpu_percent": 14.59, + "ram_percent": 58.0, + "ram_used_gb": 36.886802673339844, + "gpu_memory_used": 4422.0, + "relative_time": 116.80267906188965 + }, + { + "timestamp": "2025-01-04T02:39:49.976541", + "cpu_percent": 16.56, + "ram_percent": 58.0, + "ram_used_gb": 36.90068435668945, + "gpu_memory_used": 4422.0, + "relative_time": 117.86520886421204 + }, + { + "timestamp": "2025-01-04T02:39:51.036593", + "cpu_percent": 10.33, + "ram_percent": 58.1, + "ram_used_gb": 36.96821212768555, + "gpu_memory_used": 4416.0, + "relative_time": 118.92232513427734 + }, + { + "timestamp": "2025-01-04T02:39:52.098186", + "cpu_percent": 15.23, + "ram_percent": 58.1, + "ram_used_gb": 36.96358108520508, + "gpu_memory_used": 4416.0, + "relative_time": 119.98378920555115 + }, + { + "timestamp": "2025-01-04T02:39:53.168566", + "cpu_percent": 11.96, + "ram_percent": 58.2, + "ram_used_gb": 37.00669479370117, + "gpu_memory_used": 4416.0, + "relative_time": 121.05223441123962 + }, + { + "timestamp": "2025-01-04T02:39:54.230292", + "cpu_percent": 13.03, + "ram_percent": 58.2, + "ram_used_gb": 37.031307220458984, + "gpu_memory_used": 4416.0, + "relative_time": 122.11563086509705 + }, + { + "timestamp": "2025-01-04T02:39:55.287670", + "cpu_percent": 14.93, + "ram_percent": 58.2, + "ram_used_gb": 37.045589447021484, + "gpu_memory_used": 4416.0, + "relative_time": 123.17237305641174 + }, + { + "timestamp": "2025-01-04T02:39:56.349816", + "cpu_percent": 12.91, + "ram_percent": 58.3, + "ram_used_gb": 37.072689056396484, + "gpu_memory_used": 4416.0, + "relative_time": 124.23265671730042 + }, + { + "timestamp": "2025-01-04T02:39:57.409384", + "cpu_percent": 11.35, + "ram_percent": 58.3, + "ram_used_gb": 37.10455322265625, + "gpu_memory_used": 4416.0, + "relative_time": 125.29221749305725 + }, + { + "timestamp": "2025-01-04T02:39:58.464653", + "cpu_percent": 12.97, + "ram_percent": 58.4, + "ram_used_gb": 37.12955093383789, + "gpu_memory_used": 4416.0, + "relative_time": 126.34849739074707 + }, + { + "timestamp": "2025-01-04T02:39:59.521737", + "cpu_percent": 11.69, + "ram_percent": 58.4, + "ram_used_gb": 37.139190673828125, + "gpu_memory_used": 4416.0, + "relative_time": 127.40602111816406 + }, + { + "timestamp": "2025-01-04T02:40:00.581455", + "cpu_percent": 12.86, + "ram_percent": 58.5, + "ram_used_gb": 37.204673767089844, + "gpu_memory_used": 4418.0, + "relative_time": 128.5798671245575 + }, + { + "timestamp": "2025-01-04T02:40:01.760893", + "cpu_percent": 81.59, + "ram_percent": 58.1, + "ram_used_gb": 36.97315216064453, + "gpu_memory_used": 4425.0, + "relative_time": 129.6643455028534 + }, + { + "timestamp": "2025-01-04T02:40:02.850000", + "cpu_percent": 14.55, + "ram_percent": 58.2, + "ram_used_gb": 37.0354118347168, + "gpu_memory_used": 4435.0, + "relative_time": 130.7529788017273 + }, + { + "timestamp": "2025-01-04T02:40:03.934586", + "cpu_percent": 13.16, + "ram_percent": 58.2, + "ram_used_gb": 37.03787612915039, + "gpu_memory_used": 4437.0, + "relative_time": 131.81812405586243 + }, + { + "timestamp": "2025-01-04T02:40:04.989915", + "cpu_percent": 11.0, + "ram_percent": 58.3, + "ram_used_gb": 37.09538650512695, + "gpu_memory_used": 4437.0, + "relative_time": 132.88244915008545 + }, + { + "timestamp": "2025-01-04T02:40:06.067559", + "cpu_percent": 32.79, + "ram_percent": 58.3, + "ram_used_gb": 37.07184982299805, + "gpu_memory_used": 4437.0, + "relative_time": 133.97513842582703 + }, + { + "timestamp": "2025-01-04T02:40:07.156881", + "cpu_percent": 22.34, + "ram_percent": 58.3, + "ram_used_gb": 37.07517623901367, + "gpu_memory_used": 4438.0, + "relative_time": 135.04176831245422 + }, + { + "timestamp": "2025-01-04T02:40:08.212791", + "cpu_percent": 14.86, + "ram_percent": 58.3, + "ram_used_gb": 37.08013153076172, + "gpu_memory_used": 4438.0, + "relative_time": 136.1427457332611 + }, + { + "timestamp": "2025-01-04T02:40:09.317884", + "cpu_percent": 21.55, + "ram_percent": 58.4, + "ram_used_gb": 37.144142150878906, + "gpu_memory_used": 4447.0, + "relative_time": 137.20455360412598 + }, + { + "timestamp": "2025-01-04T02:40:10.390292", + "cpu_percent": 26.97, + "ram_percent": 58.4, + "ram_used_gb": 37.141868591308594, + "gpu_memory_used": 4454.0, + "relative_time": 138.2930736541748 + }, + { + "timestamp": "2025-01-04T02:40:11.464548", + "cpu_percent": 12.21, + "ram_percent": 58.5, + "ram_used_gb": 37.205867767333984, + "gpu_memory_used": 4451.0, + "relative_time": 139.35198616981506 + }, + { + "timestamp": "2025-01-04T02:40:12.537687", + "cpu_percent": 13.14, + "ram_percent": 58.5, + "ram_used_gb": 37.233299255371094, + "gpu_memory_used": 4452.0, + "relative_time": 140.4236707687378 + }, + { + "timestamp": "2025-01-04T02:40:13.608158", + "cpu_percent": 13.93, + "ram_percent": 58.6, + "ram_used_gb": 37.25624465942383, + "gpu_memory_used": 4452.0, + "relative_time": 141.4932518005371 + }, + { + "timestamp": "2025-01-04T02:40:14.668199", + "cpu_percent": 15.76, + "ram_percent": 58.6, + "ram_used_gb": 37.278499603271484, + "gpu_memory_used": 4452.0, + "relative_time": 142.57055759429932 + }, + { + "timestamp": "2025-01-04T02:40:15.754506", + "cpu_percent": 14.59, + "ram_percent": 58.5, + "ram_used_gb": 37.21017837524414, + "gpu_memory_used": 4451.0, + "relative_time": 143.64187097549438 + }, + { + "timestamp": "2025-01-04T02:40:16.827392", + "cpu_percent": 17.55, + "ram_percent": 58.4, + "ram_used_gb": 37.1205940246582, + "gpu_memory_used": 4450.0, + "relative_time": 144.75147438049316 + }, + { + "timestamp": "2025-01-04T02:40:17.929858", + "cpu_percent": 55.09, + "ram_percent": 58.2, + "ram_used_gb": 37.04216766357422, + "gpu_memory_used": 4449.0, + "relative_time": 145.815936088562 + }, + { + "timestamp": "2025-01-04T02:40:18.988009", + "cpu_percent": 13.92, + "ram_percent": 58.0, + "ram_used_gb": 36.90068054199219, + "gpu_memory_used": 4447.0, + "relative_time": 146.8880341053009 + }, + { + "timestamp": "2025-01-04T02:40:20.062567", + "cpu_percent": 17.42, + "ram_percent": 57.7, + "ram_used_gb": 36.69455337524414, + "gpu_memory_used": 4446.0, + "relative_time": 147.96440315246582 + }, + { + "timestamp": "2025-01-04T02:40:21.149129", + "cpu_percent": 14.78, + "ram_percent": 57.4, + "ram_used_gb": 36.50687789916992, + "gpu_memory_used": 4445.0, + "relative_time": 149.041100025177 + }, + { + "timestamp": "2025-01-04T02:40:22.221780", + "cpu_percent": 11.1, + "ram_percent": 57.0, + "ram_used_gb": 36.28267288208008, + "gpu_memory_used": 4438.0, + "relative_time": 150.125506401062 + }, + { + "timestamp": "2025-01-04T02:40:23.308492", + "cpu_percent": 12.21, + "ram_percent": 56.7, + "ram_used_gb": 36.036773681640625, + "gpu_memory_used": 4436.0, + "relative_time": 151.19524502754211 + }, + { + "timestamp": "2025-01-04T02:40:24.381177", + "cpu_percent": 13.79, + "ram_percent": 56.3, + "ram_used_gb": 35.83684539794922, + "gpu_memory_used": 4436.0, + "relative_time": 152.26534175872803 + }, + { + "timestamp": "2025-01-04T02:40:25.452457", + "cpu_percent": 12.28, + "ram_percent": 56.4, + "ram_used_gb": 35.848087310791016, + "gpu_memory_used": 4436.0, + "relative_time": 153.33880996704102 + }, + { + "timestamp": "2025-01-04T02:40:26.521613", + "cpu_percent": 12.52, + "ram_percent": 56.8, + "ram_used_gb": 36.1606330871582, + "gpu_memory_used": 4440.0, + "relative_time": 154.40920901298523 + }, + { + "timestamp": "2025-01-04T02:40:27.587547", + "cpu_percent": 7.94, + "ram_percent": 57.2, + "ram_used_gb": 36.37208557128906, + "gpu_memory_used": 4440.0, + "relative_time": 155.46942234039307 + }, + { + "timestamp": "2025-01-04T02:40:28.647400", + "cpu_percent": 8.85, + "ram_percent": 57.3, + "ram_used_gb": 36.470054626464844, + "gpu_memory_used": 4440.0, + "relative_time": 156.53129720687866 + } + ], + "test_duration": 159.19756031036377 +} \ No newline at end of file diff --git a/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt new file mode 100644 index 0000000..cb2df6a --- /dev/null +++ b/examples/assorted_checks/benchmarks/output_data/gpu_benchmark_stats_rtf.txt @@ -0,0 +1,23 @@ +=== Benchmark Statistics (with correct RTF) === + +Total tokens processed: 17150 +Total audio generated (s): 5296.38 +Total test duration (s): 155.23 +Average processing rate (tokens/s): 102.86 +Average RTF: 0.03 +Average Real Time Speed: 31.25 + +=== Per-chunk Stats === + +Average chunk size (tokens): 1715.00 +Min chunk size (tokens): 150 +Max chunk size (tokens): 5000 +Average processing time (s): 15.39 +Average output length (s): 529.64 + +=== Performance Ranges === + +Processing rate range (tokens/s): 80.65 - 125.10 +RTF range: 0.03x - 0.04x +Real Time Speed range: 25.00x - 33.33x + diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png new file mode 100644 index 0000000..339c896 Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/cpu_processing_time_rtf.png differ diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png new file mode 100644 index 0000000..3e5c8d1 Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/cpu_realtime_factor_rtf.png differ diff --git a/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png b/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png new file mode 100644 index 0000000..e209978 Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/cpu_system_usage_rtf.png differ diff --git a/examples/benchmarks/format_comparison.png b/examples/assorted_checks/benchmarks/output_plots/format_comparison.png similarity index 100% rename from examples/benchmarks/format_comparison.png rename to examples/assorted_checks/benchmarks/output_plots/format_comparison.png diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png new file mode 100644 index 0000000..62c6864 Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/gpu_processing_time_rtf.png differ diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png new file mode 100644 index 0000000..1c5d7b7 Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/gpu_realtime_factor_rtf.png differ diff --git a/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png b/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png new file mode 100644 index 0000000..942b3a8 Binary files /dev/null and b/examples/assorted_checks/benchmarks/output_plots/gpu_system_usage_rtf.png differ diff --git a/examples/benchmarks/gpu_usage.png b/examples/assorted_checks/benchmarks/output_plots/gpu_usage.png similarity index 100% rename from examples/benchmarks/gpu_usage.png rename to examples/assorted_checks/benchmarks/output_plots/gpu_usage.png diff --git a/examples/benchmarks/the_time_machine_hg_wells.txt b/examples/assorted_checks/benchmarks/the_time_machine_hg_wells.txt similarity index 100% rename from examples/benchmarks/the_time_machine_hg_wells.txt rename to examples/assorted_checks/benchmarks/the_time_machine_hg_wells.txt diff --git a/examples/test_analyze_combined_voices.py b/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py similarity index 99% rename from examples/test_analyze_combined_voices.py rename to examples/assorted_checks/test_combinations/test_analyze_combined_voices.py index 8db7865..ec280e2 100644 --- a/examples/test_analyze_combined_voices.py +++ b/examples/assorted_checks/test_combinations/test_analyze_combined_voices.py @@ -332,8 +332,8 @@ def main(): ) parser.add_argument("--url", default="http://localhost:8880", help="API base URL") parser.add_argument( - "--output-dir", - default="examples/output", + "--output-dir", + default="examples/assorted_checks/test_combinations/output", help="Output directory for audio files", ) args = parser.parse_args() diff --git a/examples/test_audio_formats.py b/examples/assorted_checks/test_formats/test_audio_formats.py similarity index 100% rename from examples/test_audio_formats.py rename to examples/assorted_checks/test_formats/test_audio_formats.py diff --git a/examples/test_openai_tts.py b/examples/assorted_checks/test_openai/test_openai_tts.py similarity index 100% rename from examples/test_openai_tts.py rename to examples/assorted_checks/test_openai/test_openai_tts.py diff --git a/examples/test_all_voices.py b/examples/assorted_checks/test_voices/test_all_voices.py similarity index 100% rename from examples/test_all_voices.py rename to examples/assorted_checks/test_voices/test_all_voices.py diff --git a/examples/assorted_checks/validate_wav.py b/examples/assorted_checks/validate_wav.py new file mode 100644 index 0000000..d925e56 --- /dev/null +++ b/examples/assorted_checks/validate_wav.py @@ -0,0 +1,231 @@ +import numpy as np +import soundfile as sf +import argparse +from pathlib import Path + +def validate_tts(wav_path: str) -> dict: + """ + Quick validation checks for TTS-generated audio files to detect common artifacts. + + Checks for: + - Unnatural silence gaps + - Audio glitches and artifacts + - Repeated speech segments (stuck/looping) + - Abrupt changes in speech + - Audio quality issues + + Args: + wav_path: Path to audio file (wav, mp3, etc) + Returns: + Dictionary with validation results + """ + try: + # Load audio + audio, sr = sf.read(wav_path) + if len(audio.shape) > 1: + audio = audio.mean(axis=1) # Convert to mono + + # Basic audio stats + duration = len(audio) / sr + rms = np.sqrt(np.mean(audio**2)) + peak = np.max(np.abs(audio)) + dc_offset = np.mean(audio) + + # Calculate clipping stats if we're near peak + clip_count = np.sum(np.abs(audio) >= 0.99) + clip_percent = (clip_count / len(audio)) * 100 + if clip_percent > 0: + clip_stats = f" ({clip_percent:.2e} ratio near peak)" + else: + clip_stats = " (no samples near peak)" + + # Convert to dB for analysis + eps = np.finfo(float).eps + db = 20 * np.log10(np.abs(audio) + eps) + + issues = [] + + # Check if audio is too short (likely failed generation) + if duration < 0.1: # Less than 100ms + issues.append("WARNING: Audio is suspiciously short - possible failed generation") + + # 1. Check for basic audio quality + if peak >= 1.0: + # Calculate percentage of samples that are clipping + clip_count = np.sum(np.abs(audio) >= 0.99) + clip_percent = (clip_count / len(audio)) * 100 + + if clip_percent > 1.0: # Only warn if more than 1% of samples clip + issues.append(f"WARNING: Significant clipping detected ({clip_percent:.2e}% of samples)") + elif clip_percent > 0.01: # Add info if more than 0.01% but less than 1% + issues.append(f"INFO: Minor peak limiting detected ({clip_percent:.2e}% of samples) - likely intentional normalization") + + if rms < 0.01: + issues.append("WARNING: Audio is very quiet - possible failed generation") + if abs(dc_offset) > 0.1: # DC offset is particularly bad for speech + issues.append(f"WARNING: High DC offset ({dc_offset:.3f}) - may cause audio artifacts") + + # 2. Check for long silence gaps (potential TTS failures) + silence_threshold = -45 # dB + min_silence = 2.0 # Only detect silences longer than 2 seconds + window_size = int(min_silence * sr) + silence_count = 0 + last_silence = -1 + + # Skip the first 0.2s for silence detection (avoid false positives at start) + start_idx = int(0.2 * sr) + for i in range(start_idx, len(db) - window_size, window_size): + window = db[i:i+window_size] + if np.mean(window) < silence_threshold: + # Verify the entire window is mostly silence + silent_ratio = np.mean(window < silence_threshold) + if silent_ratio > 0.9: # 90% of the window should be below threshold + if last_silence == -1 or (i/sr - last_silence) > 2.0: # Only count silences more than 2s apart + silence_count += 1 + last_silence = i/sr + issues.append(f"WARNING: Long silence detected at {i/sr:.2f}s (duration: {min_silence:.1f}s)") + + if silence_count > 2: # Only warn if there are multiple long silences + issues.append(f"WARNING: Multiple long silences found ({silence_count} total) - possible generation issue") + + # 3. Check for extreme audio artifacts (changes too rapid for natural speech) + # Use a longer window to avoid flagging normal phoneme transitions + window_size = int(0.02 * sr) # 20ms window + db_smooth = np.convolve(db, np.ones(window_size)/window_size, 'same') + db_diff = np.abs(np.diff(db_smooth)) + + # Much higher threshold to only catch truly unnatural changes + artifact_threshold = 40 # dB + min_duration = int(0.01 * sr) # Minimum 10ms duration + + # Find regions where the smoothed dB change is extreme + artifact_points = np.where(db_diff > artifact_threshold)[0] + + if len(artifact_points) > 0: + # Group artifacts that are very close together + grouped_artifacts = [] + current_group = [artifact_points[0]] + + for i in range(1, len(artifact_points)): + if (artifact_points[i] - current_group[-1]) < min_duration: + current_group.append(artifact_points[i]) + else: + if len(current_group) * (1/sr) >= 0.01: # Only keep groups lasting >= 10ms + grouped_artifacts.append(current_group) + current_group = [artifact_points[i]] + + if len(current_group) * (1/sr) >= 0.01: + grouped_artifacts.append(current_group) + + # Report only the most severe artifacts + for group in grouped_artifacts[:2]: # Report up to 2 worst artifacts + center_idx = group[len(group)//2] + db_change = db_diff[center_idx] + if db_change > 45: # Only report very extreme changes + issues.append( + f"WARNING: Possible audio artifact at {center_idx/sr:.2f}s " + f"({db_change:.1f}dB change over {len(group)/sr*1000:.0f}ms)" + ) + + # 4. Check for repeated speech segments (stuck/looping) + # Check both short and long sentence durations at audiobook speed (150-160 wpm) + for chunk_duration in [5.0, 10.0]: # 5s (~12 words) and 10s (~25 words) at ~audiobook speed + chunk_size = int(chunk_duration * sr) + overlap = int(0.2 * chunk_size) # 20% overlap between chunks + + for i in range(0, len(audio) - 2*chunk_size, overlap): + chunk1 = audio[i:i+chunk_size] + chunk2 = audio[i+chunk_size:i+2*chunk_size] + + # Ignore chunks that are mostly silence + if np.mean(np.abs(chunk1)) < 0.01 or np.mean(np.abs(chunk2)) < 0.01: + continue + + try: + correlation = np.corrcoef(chunk1, chunk2)[0,1] + if not np.isnan(correlation) and correlation > 0.92: # Lower threshold for sentence-length chunks + issues.append( + f"WARNING: Possible repeated speech at {i/sr:.1f}s " + f"(~{int(chunk_duration*160/60):d} words, correlation: {correlation:.3f})" + ) + break # Found repetition at this duration, try next duration + except: + continue + + # 5. Check for extreme amplitude discontinuities (common in failed TTS) + amplitude_envelope = np.abs(audio) + window_size = sr // 10 # 100ms window for smoother envelope + smooth_env = np.convolve(amplitude_envelope, np.ones(window_size)/float(window_size), 'same') + env_diff = np.abs(np.diff(smooth_env)) + + # Only detect very extreme amplitude changes + jump_threshold = 0.5 # Much higher threshold + jumps = np.where(env_diff > jump_threshold)[0] + + if len(jumps) > 0: + # Group jumps that are close together + grouped_jumps = [] + current_group = [jumps[0]] + + for i in range(1, len(jumps)): + if (jumps[i] - current_group[-1]) < 0.05 * sr: # Group within 50ms + current_group.append(jumps[i]) + else: + if len(current_group) >= 3: # Only keep significant discontinuities + grouped_jumps.append(current_group) + current_group = [jumps[i]] + + if len(current_group) >= 3: + grouped_jumps.append(current_group) + + # Report only the most severe discontinuities + for group in grouped_jumps[:2]: # Report up to 2 worst cases + center_idx = group[len(group)//2] + jump_size = env_diff[center_idx] + if jump_size > 0.6: # Only report very extreme changes + issues.append( + f"WARNING: Possible audio discontinuity at {center_idx/sr:.2f}s " + f"({jump_size:.2f} amplitude ratio change)" + ) + + return { + "file": wav_path, + "duration": f"{duration:.2f}s", + "sample_rate": sr, + "peak_amplitude": f"{peak:.3f}{clip_stats}", + "rms_level": f"{rms:.3f}", + "dc_offset": f"{dc_offset:.3f}", + "issues": issues, + "valid": len(issues) == 0 + } + + except Exception as e: + return { + "file": wav_path, + "error": str(e), + "valid": False + } + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="TTS Output Validator") + parser.add_argument("wav_file", help="Path to audio file to validate") + args = parser.parse_args() + + result = validate_tts(args.wav_file) + + print(f"\nValidating: {result['file']}") + if "error" in result: + print(f"Error: {result['error']}") + else: + print(f"Duration: {result['duration']}") + print(f"Sample Rate: {result['sample_rate']} Hz") + print(f"Peak Amplitude: {result['peak_amplitude']}") + print(f"RMS Level: {result['rms_level']}") + print(f"DC Offset: {result['dc_offset']}") + + if result["issues"]: + print("\nIssues Found:") + for issue in result["issues"]: + print(f"- {issue}") + else: + print("\nNo issues found") diff --git a/examples/assorted_checks/validate_wavs.py b/examples/assorted_checks/validate_wavs.py new file mode 100644 index 0000000..a37c043 --- /dev/null +++ b/examples/assorted_checks/validate_wavs.py @@ -0,0 +1,72 @@ +import argparse +from pathlib import Path +from validate_wav import validate_tts + +def print_validation_result(result: dict, rel_path: Path): + """Print full validation details for a single file.""" + print(f"\nValidating: {rel_path}") + if "error" in result: + print(f"Error: {result['error']}") + else: + print(f"Duration: {result['duration']}") + print(f"Sample Rate: {result['sample_rate']} Hz") + print(f"Peak Amplitude: {result['peak_amplitude']}") + print(f"RMS Level: {result['rms_level']}") + print(f"DC Offset: {result['dc_offset']}") + + if result["issues"]: + print("\nIssues Found:") + for issue in result["issues"]: + print(f"- {issue}") + else: + print("\nNo issues found") + +def validate_directory(directory: str): + """Validate all wav files in a directory with detailed output and summary.""" + dir_path = Path(directory) + + # Find all wav files (including nested directories) + wav_files = list(dir_path.rglob("*.wav")) + wav_files.extend(dir_path.rglob("*.mp3")) # Also check mp3s + wav_files = sorted(wav_files) + + if not wav_files: + print(f"No .wav or .mp3 files found in {directory}") + return + + print(f"Found {len(wav_files)} files in {directory}") + print("=" * 80) + + # Store results for summary + results = [] + + # Detailed validation output + for wav_file in wav_files: + result = validate_tts(str(wav_file)) + rel_path = wav_file.relative_to(dir_path) + print_validation_result(result, rel_path) + results.append((rel_path, result)) + print("=" * 80) + + # Summary with detailed issues + print("\nSUMMARY:") + for rel_path, result in results: + if "error" in result: + print(f"{rel_path}: ERROR - {result['error']}") + elif result["issues"]: + # Show first issue in summary, indicate if there are more + issues = result["issues"] + first_issue = issues[0].replace("WARNING: ", "") + if len(issues) > 1: + print(f"{rel_path}: FAIL - {first_issue} (+{len(issues)-1} more issues)") + else: + print(f"{rel_path}: FAIL - {first_issue}") + else: + print(f"{rel_path}: PASS") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Batch validate TTS wav files") + parser.add_argument("directory", help="Directory containing wav files to validate") + args = parser.parse_args() + + validate_directory(args.directory) diff --git a/examples/benchmarks/analysis_comparison.png b/examples/benchmarks/analysis_comparison.png deleted file mode 100644 index 87a6d13..0000000 Binary files a/examples/benchmarks/analysis_comparison.png and /dev/null differ diff --git a/examples/benchmarks/benchmark_results.json b/examples/benchmarks/benchmark_results.json deleted file mode 100644 index 373cebe..0000000 --- a/examples/benchmarks/benchmark_results.json +++ /dev/null @@ -1,531 +0,0 @@ -{ - "results": [ - { - "tokens": 100, - "processing_time": 8.54442310333252, - "output_length": 31.15, - "realtime_factor": 3.6456527987068887, - "elapsed_time": 8.720048666000366 - }, - { - "tokens": 200, - "processing_time": 1.3838517665863037, - "output_length": 62.6, - "realtime_factor": 45.236058883981606, - "elapsed_time": 10.258155345916748 - }, - { - "tokens": 300, - "processing_time": 2.2024788856506348, - "output_length": 96.325, - "realtime_factor": 43.73481200095347, - "elapsed_time": 12.594647407531738 - }, - { - "tokens": 400, - "processing_time": 3.175424098968506, - "output_length": 128.55, - "realtime_factor": 40.48278150995886, - "elapsed_time": 16.005898475646973 - }, - { - "tokens": 500, - "processing_time": 3.205301523208618, - "output_length": 158.55, - "realtime_factor": 49.46492517224587, - "elapsed_time": 19.377076625823975 - }, - { - "tokens": 600, - "processing_time": 3.9976348876953125, - "output_length": 189.225, - "realtime_factor": 47.33423769700254, - "elapsed_time": 23.568575859069824 - }, - { - "tokens": 700, - "processing_time": 4.98036003112793, - "output_length": 222.05, - "realtime_factor": 44.58513011351734, - "elapsed_time": 28.767319917678833 - }, - { - "tokens": 800, - "processing_time": 5.156893491744995, - "output_length": 253.825, - "realtime_factor": 49.22052402406907, - "elapsed_time": 34.1369092464447 - }, - { - "tokens": 900, - "processing_time": 5.8110880851745605, - "output_length": 283.75, - "realtime_factor": 48.82906537312906, - "elapsed_time": 40.16419458389282 - }, - { - "tokens": 1000, - "processing_time": 6.686216354370117, - "output_length": 315.45, - "realtime_factor": 47.17914935460046, - "elapsed_time": 47.11375427246094 - }, - { - "tokens": 2000, - "processing_time": 13.290695905685425, - "output_length": 624.925, - "realtime_factor": 47.01973504131358, - "elapsed_time": 60.842002630233765 - }, - { - "tokens": 3000, - "processing_time": 20.058005571365356, - "output_length": 932.05, - "realtime_factor": 46.46773063671828, - "elapsed_time": 81.50969815254211 - }, - { - "tokens": 4000, - "processing_time": 26.38338828086853, - "output_length": 1222.975, - "realtime_factor": 46.353978002394015, - "elapsed_time": 108.76348638534546 - }, - { - "tokens": 5000, - "processing_time": 32.472310066223145, - "output_length": 1525.15, - "realtime_factor": 46.967708699801484, - "elapsed_time": 142.2994668483734 - }, - { - "tokens": 6000, - "processing_time": 42.67592263221741, - "output_length": 1837.525, - "realtime_factor": 43.0576514030137, - "elapsed_time": 186.26759266853333 - }, - { - "tokens": 7000, - "processing_time": 51.601537466049194, - "output_length": 2146.875, - "realtime_factor": 41.60486499869347, - "elapsed_time": 239.59922289848328 - }, - { - "tokens": 8000, - "processing_time": 51.86434292793274, - "output_length": 2458.425, - "realtime_factor": 47.401063258741466, - "elapsed_time": 293.4462616443634 - }, - { - "tokens": 9000, - "processing_time": 60.4497971534729, - "output_length": 2772.1, - "realtime_factor": 45.857887545297416, - "elapsed_time": 356.02399826049805 - }, - { - "tokens": 10000, - "processing_time": 71.75962543487549, - "output_length": 3085.625, - "realtime_factor": 42.99945800024164, - "elapsed_time": 430.50863671302795 - }, - { - "tokens": 11000, - "processing_time": 96.66409230232239, - "output_length": 3389.3, - "realtime_factor": 35.062657904030935, - "elapsed_time": 529.3296246528625 - }, - { - "tokens": 12000, - "processing_time": 85.70126295089722, - "output_length": 3703.175, - "realtime_factor": 43.21027336693678, - "elapsed_time": 618.0248212814331 - }, - { - "tokens": 13000, - "processing_time": 97.2874686717987, - "output_length": 4030.825, - "realtime_factor": 41.43210893479068, - "elapsed_time": 717.9070522785187 - }, - { - "tokens": 14000, - "processing_time": 105.1045708656311, - "output_length": 4356.775, - "realtime_factor": 41.451812838566596, - "elapsed_time": 826.1140224933624 - }, - { - "tokens": 15000, - "processing_time": 111.0716404914856, - "output_length": 4663.325, - "realtime_factor": 41.984839508672565, - "elapsed_time": 940.0645899772644 - }, - { - "tokens": 16000, - "processing_time": 116.61742973327637, - "output_length": 4978.65, - "realtime_factor": 42.692160266154104, - "elapsed_time": 1061.1957621574402 - } - ], - "system_metrics": [ - { - "timestamp": "2024-12-31T03:12:36.009478", - "cpu_percent": 8.1, - "ram_percent": 66.8, - "ram_used_gb": 42.47850799560547, - "gpu_memory_used": 2124.0 - }, - { - "timestamp": "2024-12-31T03:12:44.639678", - "cpu_percent": 7.7, - "ram_percent": 69.1, - "ram_used_gb": 43.984352111816406, - "gpu_memory_used": 3486.0 - }, - { - "timestamp": "2024-12-31T03:12:44.731107", - "cpu_percent": 8.3, - "ram_percent": 69.1, - "ram_used_gb": 43.97468948364258, - "gpu_memory_used": 3484.0 - }, - { - "timestamp": "2024-12-31T03:12:46.189723", - "cpu_percent": 14.2, - "ram_percent": 69.1, - "ram_used_gb": 43.98275375366211, - "gpu_memory_used": 3697.0 - }, - { - "timestamp": "2024-12-31T03:12:46.265437", - "cpu_percent": 4.7, - "ram_percent": 69.1, - "ram_used_gb": 43.982975006103516, - "gpu_memory_used": 3697.0 - }, - { - "timestamp": "2024-12-31T03:12:48.536216", - "cpu_percent": 12.5, - "ram_percent": 69.0, - "ram_used_gb": 43.86142349243164, - "gpu_memory_used": 3697.0 - }, - { - "timestamp": "2024-12-31T03:12:48.603827", - "cpu_percent": 6.2, - "ram_percent": 69.0, - "ram_used_gb": 43.8692626953125, - "gpu_memory_used": 3694.0 - }, - { - "timestamp": "2024-12-31T03:12:51.905764", - "cpu_percent": 14.2, - "ram_percent": 69.1, - "ram_used_gb": 43.93961715698242, - "gpu_memory_used": 3690.0 - }, - { - "timestamp": "2024-12-31T03:12:52.028178", - "cpu_percent": 26.0, - "ram_percent": 69.1, - "ram_used_gb": 43.944759368896484, - "gpu_memory_used": 3690.0 - }, - { - "timestamp": "2024-12-31T03:12:55.320709", - "cpu_percent": 13.2, - "ram_percent": 69.1, - "ram_used_gb": 43.943058013916016, - "gpu_memory_used": 3685.0 - }, - { - "timestamp": "2024-12-31T03:12:55.386582", - "cpu_percent": 3.2, - "ram_percent": 69.1, - "ram_used_gb": 43.9305419921875, - "gpu_memory_used": 3685.0 - }, - { - "timestamp": "2024-12-31T03:12:59.492304", - "cpu_percent": 15.6, - "ram_percent": 69.1, - "ram_used_gb": 43.964195251464844, - "gpu_memory_used": 4053.0 - }, - { - "timestamp": "2024-12-31T03:12:59.586143", - "cpu_percent": 2.1, - "ram_percent": 69.1, - "ram_used_gb": 43.9642448425293, - "gpu_memory_used": 4053.0 - }, - { - "timestamp": "2024-12-31T03:13:04.705286", - "cpu_percent": 12.0, - "ram_percent": 69.2, - "ram_used_gb": 43.992374420166016, - "gpu_memory_used": 4059.0 - }, - { - "timestamp": "2024-12-31T03:13:04.779475", - "cpu_percent": 4.7, - "ram_percent": 69.2, - "ram_used_gb": 43.9922981262207, - "gpu_memory_used": 4059.0 - }, - { - "timestamp": "2024-12-31T03:13:10.063292", - "cpu_percent": 12.4, - "ram_percent": 69.2, - "ram_used_gb": 44.004146575927734, - "gpu_memory_used": 4041.0 - }, - { - "timestamp": "2024-12-31T03:13:10.155395", - "cpu_percent": 6.8, - "ram_percent": 69.2, - "ram_used_gb": 44.004215240478516, - "gpu_memory_used": 4041.0 - }, - { - "timestamp": "2024-12-31T03:13:16.097887", - "cpu_percent": 13.1, - "ram_percent": 69.2, - "ram_used_gb": 44.0260009765625, - "gpu_memory_used": 4042.0 - }, - { - "timestamp": "2024-12-31T03:13:16.171478", - "cpu_percent": 4.5, - "ram_percent": 69.2, - "ram_used_gb": 44.02027130126953, - "gpu_memory_used": 4042.0 - }, - { - "timestamp": "2024-12-31T03:13:23.044945", - "cpu_percent": 12.6, - "ram_percent": 69.2, - "ram_used_gb": 44.03746795654297, - "gpu_memory_used": 4044.0 - }, - { - "timestamp": "2024-12-31T03:13:23.127442", - "cpu_percent": 8.3, - "ram_percent": 69.2, - "ram_used_gb": 44.0373420715332, - "gpu_memory_used": 4044.0 - }, - { - "timestamp": "2024-12-31T03:13:36.780309", - "cpu_percent": 12.5, - "ram_percent": 69.2, - "ram_used_gb": 44.00790786743164, - "gpu_memory_used": 4034.0 - }, - { - "timestamp": "2024-12-31T03:13:36.853474", - "cpu_percent": 6.2, - "ram_percent": 69.2, - "ram_used_gb": 44.00779724121094, - "gpu_memory_used": 4034.0 - }, - { - "timestamp": "2024-12-31T03:13:57.449274", - "cpu_percent": 12.4, - "ram_percent": 69.2, - "ram_used_gb": 44.0432243347168, - "gpu_memory_used": 4034.0 - }, - { - "timestamp": "2024-12-31T03:13:57.524592", - "cpu_percent": 6.2, - "ram_percent": 69.2, - "ram_used_gb": 44.03204345703125, - "gpu_memory_used": 4034.0 - }, - { - "timestamp": "2024-12-31T03:14:24.698822", - "cpu_percent": 13.4, - "ram_percent": 69.5, - "ram_used_gb": 44.18327331542969, - "gpu_memory_used": 4480.0 - }, - { - "timestamp": "2024-12-31T03:14:24.783683", - "cpu_percent": 4.2, - "ram_percent": 69.5, - "ram_used_gb": 44.182212829589844, - "gpu_memory_used": 4480.0 - }, - { - "timestamp": "2024-12-31T03:14:58.242642", - "cpu_percent": 12.8, - "ram_percent": 69.5, - "ram_used_gb": 44.20225524902344, - "gpu_memory_used": 4476.0 - }, - { - "timestamp": "2024-12-31T03:14:58.310907", - "cpu_percent": 2.9, - "ram_percent": 69.5, - "ram_used_gb": 44.19659423828125, - "gpu_memory_used": 4476.0 - }, - { - "timestamp": "2024-12-31T03:15:42.196813", - "cpu_percent": 14.3, - "ram_percent": 69.9, - "ram_used_gb": 44.43781661987305, - "gpu_memory_used": 4494.0 - }, - { - "timestamp": "2024-12-31T03:15:42.288427", - "cpu_percent": 13.7, - "ram_percent": 69.9, - "ram_used_gb": 44.439701080322266, - "gpu_memory_used": 4494.0 - }, - { - "timestamp": "2024-12-31T03:16:35.483849", - "cpu_percent": 14.7, - "ram_percent": 65.0, - "ram_used_gb": 41.35385513305664, - "gpu_memory_used": 4506.0 - }, - { - "timestamp": "2024-12-31T03:16:35.626628", - "cpu_percent": 32.9, - "ram_percent": 65.0, - "ram_used_gb": 41.34442138671875, - "gpu_memory_used": 4506.0 - }, - { - "timestamp": "2024-12-31T03:17:29.378353", - "cpu_percent": 13.4, - "ram_percent": 64.3, - "ram_used_gb": 40.8721809387207, - "gpu_memory_used": 4485.0 - }, - { - "timestamp": "2024-12-31T03:17:29.457464", - "cpu_percent": 5.1, - "ram_percent": 64.3, - "ram_used_gb": 40.875389099121094, - "gpu_memory_used": 4485.0 - }, - { - "timestamp": "2024-12-31T03:18:31.955862", - "cpu_percent": 14.3, - "ram_percent": 65.0, - "ram_used_gb": 41.360206604003906, - "gpu_memory_used": 4484.0 - }, - { - "timestamp": "2024-12-31T03:18:32.038999", - "cpu_percent": 12.5, - "ram_percent": 65.0, - "ram_used_gb": 41.37223434448242, - "gpu_memory_used": 4484.0 - }, - { - "timestamp": "2024-12-31T03:19:46.454105", - "cpu_percent": 13.9, - "ram_percent": 65.3, - "ram_used_gb": 41.562198638916016, - "gpu_memory_used": 4487.0 - }, - { - "timestamp": "2024-12-31T03:19:46.524303", - "cpu_percent": 6.8, - "ram_percent": 65.3, - "ram_used_gb": 41.56681442260742, - "gpu_memory_used": 4487.0 - }, - { - "timestamp": "2024-12-31T03:21:25.251452", - "cpu_percent": 23.7, - "ram_percent": 62.0, - "ram_used_gb": 39.456459045410156, - "gpu_memory_used": 4488.0 - }, - { - "timestamp": "2024-12-31T03:21:25.348643", - "cpu_percent": 2.9, - "ram_percent": 62.0, - "ram_used_gb": 39.454288482666016, - "gpu_memory_used": 4487.0 - }, - { - "timestamp": "2024-12-31T03:22:53.939896", - "cpu_percent": 12.9, - "ram_percent": 62.1, - "ram_used_gb": 39.50320053100586, - "gpu_memory_used": 4488.0 - }, - { - "timestamp": "2024-12-31T03:22:54.041607", - "cpu_percent": 8.3, - "ram_percent": 62.1, - "ram_used_gb": 39.49895095825195, - "gpu_memory_used": 4488.0 - }, - { - "timestamp": "2024-12-31T03:24:33.835432", - "cpu_percent": 12.9, - "ram_percent": 62.3, - "ram_used_gb": 39.647212982177734, - "gpu_memory_used": 4503.0 - }, - { - "timestamp": "2024-12-31T03:24:33.923914", - "cpu_percent": 7.6, - "ram_percent": 62.3, - "ram_used_gb": 39.64302062988281, - "gpu_memory_used": 4503.0 - }, - { - "timestamp": "2024-12-31T03:26:22.021598", - "cpu_percent": 12.9, - "ram_percent": 58.4, - "ram_used_gb": 37.162540435791016, - "gpu_memory_used": 4491.0 - }, - { - "timestamp": "2024-12-31T03:26:22.142138", - "cpu_percent": 12.0, - "ram_percent": 58.4, - "ram_used_gb": 37.162010192871094, - "gpu_memory_used": 4487.0 - }, - { - "timestamp": "2024-12-31T03:28:15.970365", - "cpu_percent": 15.0, - "ram_percent": 58.2, - "ram_used_gb": 37.04011535644531, - "gpu_memory_used": 4481.0 - }, - { - "timestamp": "2024-12-31T03:28:16.096459", - "cpu_percent": 12.4, - "ram_percent": 58.2, - "ram_used_gb": 37.035972595214844, - "gpu_memory_used": 4473.0 - }, - { - "timestamp": "2024-12-31T03:30:17.092257", - "cpu_percent": 12.4, - "ram_percent": 58.4, - "ram_used_gb": 37.14639663696289, - "gpu_memory_used": 4459.0 - } - ] -} \ No newline at end of file diff --git a/examples/benchmarks/benchmark_stats.txt b/examples/benchmarks/benchmark_stats.txt deleted file mode 100644 index c2a9b02..0000000 --- a/examples/benchmarks/benchmark_stats.txt +++ /dev/null @@ -1,19 +0,0 @@ -=== Benchmark Statistics === - -Overall Stats: -Total tokens processed: 140500 -Total audio generated: 43469.18s -Total test duration: 1061.20s -Average processing rate: 137.67 tokens/second -Average realtime factor: 42.93x - -Per-chunk Stats: -Average chunk size: 5620.00 tokens -Min chunk size: 100.00 tokens -Max chunk size: 16000.00 tokens -Average processing time: 41.13s -Average output length: 1738.77s - -Performance Ranges: -Processing rate range: 11.70 - 155.99 tokens/second -Realtime factor range: 3.65x - 49.46x diff --git a/examples/benchmarks/benchmark_tts.py b/examples/benchmarks/benchmark_tts.py deleted file mode 100644 index f17e6ee..0000000 --- a/examples/benchmarks/benchmark_tts.py +++ /dev/null @@ -1,406 +0,0 @@ -import os -import json -import time -import subprocess -from datetime import datetime - -import pandas as pd -import psutil -import seaborn as sns -import requests -import tiktoken -import scipy.io.wavfile as wavfile -import matplotlib.pyplot as plt - -enc = tiktoken.get_encoding("cl100k_base") - - -def setup_plot(fig, ax, title): - """Configure plot styling""" - # Improve grid - ax.grid(True, linestyle="--", alpha=0.3, color="#ffffff") - - # Set title and labels with better fonts - ax.set_title(title, pad=20, fontsize=16, fontweight="bold", color="#ffffff") - ax.set_xlabel(ax.get_xlabel(), fontsize=14, fontweight="medium", color="#ffffff") - ax.set_ylabel(ax.get_ylabel(), fontsize=14, fontweight="medium", color="#ffffff") - - # Improve tick labels - ax.tick_params(labelsize=12, colors="#ffffff") - - # Style spines - for spine in ax.spines.values(): - spine.set_color("#ffffff") - spine.set_alpha(0.3) - spine.set_linewidth(0.5) - - # Set background colors - ax.set_facecolor("#1a1a2e") - fig.patch.set_facecolor("#1a1a2e") - - return fig, ax - - -def get_text_for_tokens(text: str, num_tokens: int) -> str: - """Get a slice of text that contains exactly num_tokens tokens""" - tokens = enc.encode(text) - if num_tokens > len(tokens): - return text - return enc.decode(tokens[:num_tokens]) - - -def get_audio_length(audio_data: bytes) -> float: - """Get audio length in seconds from bytes data""" - # Save to a temporary file - temp_path = "examples/benchmarks/output/temp.wav" - os.makedirs(os.path.dirname(temp_path), exist_ok=True) - with open(temp_path, "wb") as f: - f.write(audio_data) - - # Read the audio file - try: - rate, data = wavfile.read(temp_path) - return len(data) / rate - finally: - # Clean up temp file - if os.path.exists(temp_path): - os.remove(temp_path) - - -def get_gpu_memory(): - """Get GPU memory usage using nvidia-smi""" - try: - result = subprocess.check_output( - ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,nounits,noheader"] - ) - return float(result.decode("utf-8").strip()) - except (subprocess.CalledProcessError, FileNotFoundError): - return None - - -def get_system_metrics(): - """Get current system metrics""" - metrics = { - "timestamp": datetime.now().isoformat(), - "cpu_percent": psutil.cpu_percent(), - "ram_percent": psutil.virtual_memory().percent, - "ram_used_gb": psutil.virtual_memory().used / (1024**3), - } - - gpu_mem = get_gpu_memory() - if gpu_mem is not None: - metrics["gpu_memory_used"] = gpu_mem - - return metrics - - -def make_tts_request(text: str, timeout: int = 120) -> tuple[float, float]: - """Make TTS request using OpenAI-compatible endpoint and return processing time and output length""" - try: - start_time = time.time() - - # Make request to OpenAI-compatible endpoint - response = requests.post( - "http://localhost:8880/v1/audio/speech", - json={ - "model": "kokoro", - "input": text, - "voice": "af", - "response_format": "wav", - }, - timeout=timeout, - ) - response.raise_for_status() - - processing_time = time.time() - start_time - audio_length = get_audio_length(response.content) - - # Save the audio file - token_count = len(enc.encode(text)) - output_file = f"examples/benchmarks/output/chunk_{token_count}_tokens.wav" - os.makedirs(os.path.dirname(output_file), exist_ok=True) - with open(output_file, "wb") as f: - f.write(response.content) - print(f"Saved audio to {output_file}") - - return processing_time, audio_length - - except requests.exceptions.RequestException as e: - print(f"Error making request for text: {text[:50]}... Error: {str(e)}") - return None, None - except Exception as e: - print(f"Error processing text: {text[:50]}... Error: {str(e)}") - return None, None - - -def plot_system_metrics(metrics_data): - """Create plots for system metrics over time""" - df = pd.DataFrame(metrics_data) - df["timestamp"] = pd.to_datetime(df["timestamp"]) - elapsed_time = (df["timestamp"] - df["timestamp"].iloc[0]).dt.total_seconds() - - # Get baseline values (first measurement) - baseline_cpu = df["cpu_percent"].iloc[0] - baseline_ram = df["ram_used_gb"].iloc[0] - baseline_gpu = ( - df["gpu_memory_used"].iloc[0] / 1024 - if "gpu_memory_used" in df.columns - else None - ) # Convert MB to GB - - # Convert GPU memory to GB - if "gpu_memory_used" in df.columns: - df["gpu_memory_gb"] = df["gpu_memory_used"] / 1024 - - # Set plotting style - plt.style.use("dark_background") - - # Create figure with 3 subplots (or 2 if no GPU) - has_gpu = "gpu_memory_used" in df.columns - num_plots = 3 if has_gpu else 2 - fig, axes = plt.subplots(num_plots, 1, figsize=(15, 5 * num_plots)) - fig.patch.set_facecolor("#1a1a2e") - - # Apply rolling average for smoothing - window = min(5, len(df) // 2) # Smaller window for smoother lines - - # Plot 1: CPU Usage - smoothed_cpu = df["cpu_percent"].rolling(window=window, center=True).mean() - sns.lineplot( - x=elapsed_time, y=smoothed_cpu, ax=axes[0], color="#ff2a6d", linewidth=2 - ) - axes[0].axhline( - y=baseline_cpu, color="#05d9e8", linestyle="--", alpha=0.5, label="Baseline" - ) - axes[0].set_xlabel("Time (seconds)", fontsize=14) - axes[0].set_ylabel("CPU Usage (%)", fontsize=14) - axes[0].tick_params(labelsize=12) - axes[0].set_title("CPU Usage Over Time", pad=20, fontsize=16, fontweight="bold") - axes[0].set_ylim(0, max(df["cpu_percent"]) * 1.1) # Add 10% padding - axes[0].legend() - - # Plot 2: RAM Usage - smoothed_ram = df["ram_used_gb"].rolling(window=window, center=True).mean() - sns.lineplot( - x=elapsed_time, y=smoothed_ram, ax=axes[1], color="#05d9e8", linewidth=2 - ) - axes[1].axhline( - y=baseline_ram, color="#ff2a6d", linestyle="--", alpha=0.5, label="Baseline" - ) - axes[1].set_xlabel("Time (seconds)", fontsize=14) - axes[1].set_ylabel("RAM Usage (GB)", fontsize=14) - axes[1].tick_params(labelsize=12) - axes[1].set_title("RAM Usage Over Time", pad=20, fontsize=16, fontweight="bold") - axes[1].set_ylim(0, max(df["ram_used_gb"]) * 1.1) # Add 10% padding - axes[1].legend() - - # Plot 3: GPU Memory (if available) - if has_gpu: - smoothed_gpu = df["gpu_memory_gb"].rolling(window=window, center=True).mean() - sns.lineplot( - x=elapsed_time, y=smoothed_gpu, ax=axes[2], color="#ff2a6d", linewidth=2 - ) - axes[2].axhline( - y=baseline_gpu, color="#05d9e8", linestyle="--", alpha=0.5, label="Baseline" - ) - axes[2].set_xlabel("Time (seconds)", fontsize=14) - axes[2].set_ylabel("GPU Memory (GB)", fontsize=14) - axes[2].tick_params(labelsize=12) - axes[2].set_title( - "GPU Memory Usage Over Time", pad=20, fontsize=16, fontweight="bold" - ) - axes[2].set_ylim(0, max(df["gpu_memory_gb"]) * 1.1) # Add 10% padding - axes[2].legend() - - # Style all subplots - for ax in axes: - ax.grid(True, linestyle="--", alpha=0.3) - ax.set_facecolor("#1a1a2e") - for spine in ax.spines.values(): - spine.set_color("#ffffff") - spine.set_alpha(0.3) - - plt.tight_layout() - plt.savefig("examples/benchmarks/system_usage.png", dpi=300, bbox_inches="tight") - plt.close() - - -def main(): - # Create output directory - os.makedirs("examples/benchmarks/output", exist_ok=True) - - # Read input text - with open( - "examples/benchmarks/the_time_machine_hg_wells.txt", "r", encoding="utf-8" - ) as f: - text = f.read() - - # Get total tokens in file - total_tokens = len(enc.encode(text)) - print(f"Total tokens in file: {total_tokens}") - - # Generate token sizes with dense sampling at start and increasing intervals - dense_range = list(range(100, 1001, 100)) - current = max(dense_range) - large_range = [] - while current <= total_tokens: - large_range.append(current) - current += 1000 - - token_sizes = sorted(list(set(dense_range + large_range))) - print(f"Testing sizes: {token_sizes}") - - # Process chunks - results = [] - system_metrics = [] - test_start_time = time.time() - - for num_tokens in token_sizes: - # Get text slice with exact token count - chunk = get_text_for_tokens(text, num_tokens) - actual_tokens = len(enc.encode(chunk)) - - print(f"\nProcessing chunk with {actual_tokens} tokens:") - print(f"Text preview: {chunk[:100]}...") - - # Collect system metrics before processing - system_metrics.append(get_system_metrics()) - - processing_time, audio_length = make_tts_request(chunk) - if processing_time is None or audio_length is None: - print("Breaking loop due to error") - break - - # Collect system metrics after processing - system_metrics.append(get_system_metrics()) - - results.append( - { - "tokens": actual_tokens, - "processing_time": processing_time, - "output_length": audio_length, - "realtime_factor": audio_length / processing_time, - "elapsed_time": time.time() - test_start_time, - } - ) - - # Save intermediate results - with open("examples/benchmarks/benchmark_results.json", "w") as f: - json.dump( - {"results": results, "system_metrics": system_metrics}, f, indent=2 - ) - - # Create DataFrame and calculate stats - df = pd.DataFrame(results) - if df.empty: - print("No data to plot") - return - - # Calculate useful metrics - df["tokens_per_second"] = df["tokens"] / df["processing_time"] - - # Write detailed stats - with open("examples/benchmarks/benchmark_stats.txt", "w") as f: - f.write("=== Benchmark Statistics ===\n\n") - - f.write("Overall Stats:\n") - f.write(f"Total tokens processed: {df['tokens'].sum()}\n") - f.write(f"Total audio generated: {df['output_length'].sum():.2f}s\n") - f.write(f"Total test duration: {df['elapsed_time'].max():.2f}s\n") - f.write( - f"Average processing rate: {df['tokens_per_second'].mean():.2f} tokens/second\n" - ) - f.write(f"Average realtime factor: {df['realtime_factor'].mean():.2f}x\n\n") - - f.write("Per-chunk Stats:\n") - f.write(f"Average chunk size: {df['tokens'].mean():.2f} tokens\n") - f.write(f"Min chunk size: {df['tokens'].min():.2f} tokens\n") - f.write(f"Max chunk size: {df['tokens'].max():.2f} tokens\n") - f.write(f"Average processing time: {df['processing_time'].mean():.2f}s\n") - f.write(f"Average output length: {df['output_length'].mean():.2f}s\n\n") - - f.write("Performance Ranges:\n") - f.write( - f"Processing rate range: {df['tokens_per_second'].min():.2f} - {df['tokens_per_second'].max():.2f} tokens/second\n" - ) - f.write( - f"Realtime factor range: {df['realtime_factor'].min():.2f}x - {df['realtime_factor'].max():.2f}x\n" - ) - - # Set plotting style - plt.style.use("dark_background") - - # Plot 1: Processing Time vs Token Count - fig, ax = plt.subplots(figsize=(12, 8)) - sns.scatterplot( - data=df, x="tokens", y="processing_time", s=100, alpha=0.6, color="#ff2a6d" - ) - sns.regplot( - data=df, - x="tokens", - y="processing_time", - scatter=False, - color="#05d9e8", - line_kws={"linewidth": 2}, - ) - corr = df["tokens"].corr(df["processing_time"]) - plt.text( - 0.05, - 0.95, - f"Correlation: {corr:.2f}", - transform=ax.transAxes, - fontsize=10, - color="#ffffff", - bbox=dict(facecolor="#1a1a2e", edgecolor="#ffffff", alpha=0.7), - ) - setup_plot(fig, ax, "Processing Time vs Input Size") - ax.set_xlabel("Number of Input Tokens") - ax.set_ylabel("Processing Time (seconds)") - plt.savefig("examples/benchmarks/processing_time.png", dpi=300, bbox_inches="tight") - plt.close() - - # Plot 2: Realtime Factor vs Token Count - fig, ax = plt.subplots(figsize=(12, 8)) - sns.scatterplot( - data=df, x="tokens", y="realtime_factor", s=100, alpha=0.6, color="#ff2a6d" - ) - sns.regplot( - data=df, - x="tokens", - y="realtime_factor", - scatter=False, - color="#05d9e8", - line_kws={"linewidth": 2}, - ) - corr = df["tokens"].corr(df["realtime_factor"]) - plt.text( - 0.05, - 0.95, - f"Correlation: {corr:.2f}", - transform=ax.transAxes, - fontsize=10, - color="#ffffff", - bbox=dict(facecolor="#1a1a2e", edgecolor="#ffffff", alpha=0.7), - ) - setup_plot(fig, ax, "Realtime Factor vs Input Size") - ax.set_xlabel("Number of Input Tokens") - ax.set_ylabel("Realtime Factor (output length / processing time)") - plt.savefig("examples/benchmarks/realtime_factor.png", dpi=300, bbox_inches="tight") - plt.close() - - # Plot system metrics - plot_system_metrics(system_metrics) - - print("\nResults saved to:") - print("- examples/benchmarks/benchmark_results.json") - print("- examples/benchmarks/benchmark_stats.txt") - print("- examples/benchmarks/processing_time.png") - print("- examples/benchmarks/realtime_factor.png") - print("- examples/benchmarks/system_usage.png") - if any("gpu_memory_used" in m for m in system_metrics): - print("- examples/benchmarks/gpu_usage.png") - print("\nAudio files saved in examples/benchmarks/output/") - - -if __name__ == "__main__": - main() diff --git a/examples/benchmarks/processing_time.png b/examples/benchmarks/processing_time.png deleted file mode 100644 index c66fcaf..0000000 Binary files a/examples/benchmarks/processing_time.png and /dev/null differ diff --git a/examples/benchmarks/realtime_factor.png b/examples/benchmarks/realtime_factor.png deleted file mode 100644 index 249685b..0000000 Binary files a/examples/benchmarks/realtime_factor.png and /dev/null differ diff --git a/examples/benchmarks/system_usage.png b/examples/benchmarks/system_usage.png deleted file mode 100644 index bc10eb0..0000000 Binary files a/examples/benchmarks/system_usage.png and /dev/null differ