diff --git a/Dockerfile b/Dockerfile index a58ced6..e06d314 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,3 @@ -# Stage 1: Clone model repository -FROM alpine/git:latest AS model_layer -ARG KOKORO_REPO=https://huggingface.co/hexgrad/Kokoro-82M -ARG KOKORO_COMMIT=a67f11354c3e38c58c3327498bc4bd1e57e71c50 - -RUN git lfs install --skip-repo -WORKDIR /app/Kokoro-82M -RUN GIT_LFS_SKIP_SMUDGE=1 git clone ${KOKORO_REPO} . && \ - git checkout ${KOKORO_COMMIT} && \ - git lfs pull && \ - ls -la - -# Stage 2: Build FROM nvidia/cuda:12.1.0-base-ubuntu22.04 # Install base system dependencies @@ -30,28 +17,27 @@ RUN pip3 install --no-cache-dir torch==2.5.1 --extra-index-url https://download. COPY requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt +# Copy application code and model +COPY . /app/ + # Set working directory WORKDIR /app - # Run with Python unbuffered output for live logging ENV PYTHONUNBUFFERED=1 -# Copy model files from git clone stage -COPY --from=model_layer /app/Kokoro-82M /app/Kokoro-82M - # Create non-root user RUN useradd -m -u 1000 appuser -# Create and set permissions for output directory -RUN mkdir -p /app/api/src/output && \ - chown -R appuser:appuser /app/api/src/output - -# Set Python path (app first for our imports, then model dir for model imports) -ENV PYTHONPATH=/app:/app/Kokoro-82M +# Create directories and set permissions +RUN mkdir -p /app/Kokoro-82M && \ + chown -R appuser:appuser /app # Switch to non-root user USER appuser +# Set Python path (app first for our imports, then model dir for model imports) +ENV PYTHONPATH=/app:/app/Kokoro-82M + # Run FastAPI server with debug logging and reload CMD ["uvicorn", "api.src.main:app", "--host", "0.0.0.0", "--port", "8880", "--log-level", "debug"] diff --git a/README.md b/README.md index 35309e9..a40da19 100644 --- a/README.md +++ b/README.md @@ -3,81 +3,114 @@

# Kokoro TTS API -[![Model Commit](https://img.shields.io/badge/model--commit-a67f113-blue)](https://huggingface.co/hexgrad/Kokoro-82M/tree/a67f11354c3e38c58c3327498bc4bd1e57e71c50) +[![Model Commit](https://img.shields.io/badge/model--commit-a67f113-blue)](https://huggingface.co/hexgrad/Kokoro-82M/tree/8228a351f87c8a6076502c1e3b7e72e821ebec9a) +[![Tests](https://img.shields.io/badge/tests-33%20passed-darkgreen)]() +[![Coverage](https://img.shields.io/badge/coverage-97%25-darkgreen)]() FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model. -Dockerized with NVIDIA GPU support, simple queue handling via sqllite, and automatic chunking/stitching on lengthy input/outputs +OpenAI-compatible API with NVIDIA GPU support, with automatic chunking/stitching for long texts, and very fast generation time (~35-49x RTF) ## Quick Start +1. Install prerequisites: + - Install [Docker Desktop](https://www.docker.com/products/docker-desktop/) + - Install [Git](https://git-scm.com/downloads) (or download and extract zip) + +2. Clone and run: ```bash +# Clone repository +git clone https://github.com/remsky/Kokoro-FastAPI.git +cd Kokoro-FastAPI + # Start the API (will automatically clone source HF repo via git-lfs) docker compose up --build ``` -Test it out: +Test all voices: ```bash -# From host terminal -python examples/test_tts.py "Hello world" --voice af_bella +python examples/test_all_voices.py +``` + +Test OpenAI compatibility: +```bash +python examples/test_openai_tts.py +``` + +## OpenAI-Compatible API + +List available voices: +```python +import requests + +response = requests.get("http://localhost:8000/audio/voices") +voices = response.json()["voices"] +``` + +Generate speech: +```python +import requests + +response = requests.post( + "http://localhost:8000/audio/speech", + json={ + "model": "kokoro", # Not used but required for compatibility + "input": "Hello world!", + "voice": "af_bella", + "response_format": "mp3", # Supported: mp3, wav, opus, flac, aac + "speed": 1.0 + } +) + +# Save audio +with open("output.mp3", "wb") as f: + f.write(response.content) +``` + +Using OpenAI's Python library: +```python +from openai import OpenAI + +client = OpenAI(base_url="http://localhost:8000", api_key="not-needed") + +response = client.audio.speech.create( + model="kokoro", # Not used but required for compatibility, also accepts library defaults + voice="af_bella", + input="Hello world!", + response_format="mp3" +) + +response.stream_to_file("output.mp3") ``` ## Performance Benchmarks -Benchmarking was performed solely on generation via the local API (ignoring file transfers) using various text lengths up to ~10 minutes output, measuring processing time, token count, and output audio length. Tests were run on: +Benchmarking was performed on generation via the local API using text lengths up to feature-length books (~1.5 hours output), measuring processing time and realtime factor. Tests were run on: - Windows 11 Home w/ WSL2 - NVIDIA 4060Ti 16gb GPU @ CUDA 12.1 - 11th Gen i7-11700 @ 2.5GHz - 64gb RAM -- Randomized chunks from H.G. Wells - The Time Machine +- H.G. Wells - The Time Machine (full text)

- Processing Time vs Output Length - Processing Time vs Token Count + Processing Time + Realtime Factor

+Key Performance Metrics: +- Realtime Factor: Ranges between 35-49x (generation time to output audio length) +- Average Processing Rate: 137.67 tokens/second +- Efficient Scaling: Maintains performance with long texts through automatic chunking +- Natural Boundary Detection: Automatically splits and stitches at sentence boundaries to prevent artifacts -- Average processing speed: ~3.4 seconds per minute of audio output -- Efficient token processing: ~0.01 seconds per token -- Scales well with longer texts, maintains consistent performance +## Features -## API Endpoints - -```bash -GET /tts/voices # List available voices -POST /tts # Generate speech -GET /tts/{request_id} # Check generation status -GET /tts/file/{request_id} # Download audio file -``` - -## Example Usage - -List available voices: -```bash -python examples/test_tts.py -``` - -Generate speech: -```bash -# Default voice -python examples/test_tts.py "Your text here" - -# Specific voice -python examples/test_tts.py --voice af_bella "Your text here" - -# Get file path without downloading -python examples/test_tts.py --no-download "Your text here" -``` - -Generated files are saved in: -- With download: `examples/output/` -- Without download: `src/output/` (in API container) - -## Requirements - -- Docker -- NVIDIA GPU + CUDA -- nvidia-container-toolkit installed on host +- OpenAI-compatible API endpoints +- Multiple audio formats: mp3, wav, opus, flac, aac +- Automatic text chunking and audio stitching +- GPU-accelerated inference +- Queue handling via SQLite +- Progress tracking for long generations ## Model diff --git a/api/src/services/tts.py b/api/src/services/tts.py index 76b83cc..7224b0e 100644 --- a/api/src/services/tts.py +++ b/api/src/services/tts.py @@ -92,7 +92,7 @@ class TTSService: # Validate phonemization first ps = phonemize(chunk, voice[0]) tokens = tokenize(ps) - logger.info( + logger.debug( f"Processing chunk {i+1}/{len(chunks)}: {len(tokens)} tokens" ) diff --git a/api/src/structures/schemas.py b/api/src/structures/schemas.py index 5a36e4a..2031e97 100644 --- a/api/src/structures/schemas.py +++ b/api/src/structures/schemas.py @@ -26,6 +26,7 @@ class OpenAISpeechRequest(BaseModel): "bf_emma", "af_sarah", "af_bella", + "af_nicole", ] = Field(default="af", description="The voice to use for generation") response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] = Field( default="mp3", diff --git a/docker-compose.yml b/docker-compose.yml index 1d2ae89..aacb121 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,11 +1,33 @@ services: + model-fetcher: + image: datamachines/git-lfs:latest + volumes: + - ./Kokoro-82M:/app/Kokoro-82M + working_dir: /app/Kokoro-82M + command: > + sh -c " + if [ -z \"$(ls -A .)\" ]; then + git clone https://huggingface.co/hexgrad/Kokoro-82M . && \ + git checkout 8228a351f87c8a6076502c1e3b7e72e821ebec9a; + touch .cloned; + else + touch .cloned; + fi; + tail -f /dev/null + " + healthcheck: + test: ["CMD", "test", "-f", ".cloned"] + interval: 1s + timeout: 1s + retries: 120 + start_period: 1s + kokoro-tts: build: context: . volumes: - ./api/src:/app/api/src - ./Kokoro-82M:/app/Kokoro-82M - - ./api/src/output:/app/api/src/output ports: - "8880:8880" environment: @@ -17,3 +39,6 @@ services: - driver: nvidia count: 1 capabilities: [gpu] + depends_on: + model-fetcher: + condition: service_healthy diff --git a/examples/benchmarks/benchmark_results.json b/examples/benchmarks/benchmark_results.json index c72f896..373cebe 100644 --- a/examples/benchmarks/benchmark_results.json +++ b/examples/benchmarks/benchmark_results.json @@ -2,278 +2,530 @@ "results": [ { "tokens": 100, - "processing_time": 0.965601921081543, + "processing_time": 8.54442310333252, "output_length": 31.15, - "realtime_factor": 32.25967069857295, - "elapsed_time": 1.1258533000946045 + "realtime_factor": 3.6456527987068887, + "elapsed_time": 8.720048666000366 }, { "tokens": 200, - "processing_time": 1.2544827461242676, + "processing_time": 1.3838517665863037, "output_length": 62.6, - "realtime_factor": 49.901045027046486, - "elapsed_time": 2.52616286277771 + "realtime_factor": 45.236058883981606, + "elapsed_time": 10.258155345916748 }, { "tokens": 300, - "processing_time": 2.121187686920166, - "output_length": 96.475, - "realtime_factor": 45.48159533213006, - "elapsed_time": 4.830777883529663 + "processing_time": 2.2024788856506348, + "output_length": 96.325, + "realtime_factor": 43.73481200095347, + "elapsed_time": 12.594647407531738 }, { "tokens": 400, - "processing_time": 2.715940475463867, - "output_length": 128.675, - "realtime_factor": 47.3776951897383, - "elapsed_time": 7.719157934188843 + "processing_time": 3.175424098968506, + "output_length": 128.55, + "realtime_factor": 40.48278150995886, + "elapsed_time": 16.005898475646973 }, { "tokens": 500, - "processing_time": 3.1464896202087402, - "output_length": 158.775, - "realtime_factor": 50.46099595569833, - "elapsed_time": 11.027976274490356 + "processing_time": 3.205301523208618, + "output_length": 158.55, + "realtime_factor": 49.46492517224587, + "elapsed_time": 19.377076625823975 }, { - "tokens": 750, - "processing_time": 5.136769533157349, - "output_length": 237.25, - "realtime_factor": 46.18661562847511, - "elapsed_time": 16.39173412322998 + "tokens": 600, + "processing_time": 3.9976348876953125, + "output_length": 189.225, + "realtime_factor": 47.33423769700254, + "elapsed_time": 23.568575859069824 + }, + { + "tokens": 700, + "processing_time": 4.98036003112793, + "output_length": 222.05, + "realtime_factor": 44.58513011351734, + "elapsed_time": 28.767319917678833 + }, + { + "tokens": 800, + "processing_time": 5.156893491744995, + "output_length": 253.825, + "realtime_factor": 49.22052402406907, + "elapsed_time": 34.1369092464447 + }, + { + "tokens": 900, + "processing_time": 5.8110880851745605, + "output_length": 283.75, + "realtime_factor": 48.82906537312906, + "elapsed_time": 40.16419458389282 }, { "tokens": 1000, - "processing_time": 6.8030219078063965, - "output_length": 315.575, - "realtime_factor": 46.38747372515161, - "elapsed_time": 23.391889572143555 - }, - { - "tokens": 1500, - "processing_time": 9.774210453033447, - "output_length": 470.75, - "realtime_factor": 48.16245795627428, - "elapsed_time": 33.43451166152954 + "processing_time": 6.686216354370117, + "output_length": 315.45, + "realtime_factor": 47.17914935460046, + "elapsed_time": 47.11375427246094 }, { "tokens": 2000, - "processing_time": 15.099190711975098, - "output_length": 625.625, - "realtime_factor": 41.43433988841665, - "elapsed_time": 48.88647747039795 + "processing_time": 13.290695905685425, + "output_length": 624.925, + "realtime_factor": 47.01973504131358, + "elapsed_time": 60.842002630233765 }, { "tokens": 3000, - "processing_time": 19.526690244674683, - "output_length": 932.775, - "realtime_factor": 47.76923217975388, - "elapsed_time": 68.92684745788574 + "processing_time": 20.058005571365356, + "output_length": 932.05, + "realtime_factor": 46.46773063671828, + "elapsed_time": 81.50969815254211 }, { "tokens": 4000, - "processing_time": 27.7449471950531, - "output_length": 1224.2, - "realtime_factor": 44.12334942984767, - "elapsed_time": 97.2753164768219 + "processing_time": 26.38338828086853, + "output_length": 1222.975, + "realtime_factor": 46.353978002394015, + "elapsed_time": 108.76348638534546 + }, + { + "tokens": 5000, + "processing_time": 32.472310066223145, + "output_length": 1525.15, + "realtime_factor": 46.967708699801484, + "elapsed_time": 142.2994668483734 + }, + { + "tokens": 6000, + "processing_time": 42.67592263221741, + "output_length": 1837.525, + "realtime_factor": 43.0576514030137, + "elapsed_time": 186.26759266853333 + }, + { + "tokens": 7000, + "processing_time": 51.601537466049194, + "output_length": 2146.875, + "realtime_factor": 41.60486499869347, + "elapsed_time": 239.59922289848328 }, { "tokens": 8000, - "processing_time": 58.24176383018494, - "output_length": 2459.625, - "realtime_factor": 42.23129311762449, - "elapsed_time": 156.6953580379486 + "processing_time": 51.86434292793274, + "output_length": 2458.425, + "realtime_factor": 47.401063258741466, + "elapsed_time": 293.4462616443634 + }, + { + "tokens": 9000, + "processing_time": 60.4497971534729, + "output_length": 2772.1, + "realtime_factor": 45.857887545297416, + "elapsed_time": 356.02399826049805 + }, + { + "tokens": 10000, + "processing_time": 71.75962543487549, + "output_length": 3085.625, + "realtime_factor": 42.99945800024164, + "elapsed_time": 430.50863671302795 + }, + { + "tokens": 11000, + "processing_time": 96.66409230232239, + "output_length": 3389.3, + "realtime_factor": 35.062657904030935, + "elapsed_time": 529.3296246528625 + }, + { + "tokens": 12000, + "processing_time": 85.70126295089722, + "output_length": 3703.175, + "realtime_factor": 43.21027336693678, + "elapsed_time": 618.0248212814331 + }, + { + "tokens": 13000, + "processing_time": 97.2874686717987, + "output_length": 4030.825, + "realtime_factor": 41.43210893479068, + "elapsed_time": 717.9070522785187 + }, + { + "tokens": 14000, + "processing_time": 105.1045708656311, + "output_length": 4356.775, + "realtime_factor": 41.451812838566596, + "elapsed_time": 826.1140224933624 + }, + { + "tokens": 15000, + "processing_time": 111.0716404914856, + "output_length": 4663.325, + "realtime_factor": 41.984839508672565, + "elapsed_time": 940.0645899772644 }, { "tokens": 16000, - "processing_time": 110.98782324790955, - "output_length": 4980.6, - "realtime_factor": 44.875193100012524, - "elapsed_time": 270.2029483318329 + "processing_time": 116.61742973327637, + "output_length": 4978.65, + "realtime_factor": 42.692160266154104, + "elapsed_time": 1061.1957621574402 } ], "system_metrics": [ { - "timestamp": "2024-12-31T01:04:14.249314", - "cpu_percent": 16.3, - "ram_percent": 49.7, - "ram_used_gb": 31.605762481689453, - "gpu_memory_used": 3861.0 + "timestamp": "2024-12-31T03:12:36.009478", + "cpu_percent": 8.1, + "ram_percent": 66.8, + "ram_used_gb": 42.47850799560547, + "gpu_memory_used": 2124.0 }, { - "timestamp": "2024-12-31T01:04:15.292017", - "cpu_percent": 16.0, - "ram_percent": 49.7, - "ram_used_gb": 31.584003448486328, - "gpu_memory_used": 3891.0 + "timestamp": "2024-12-31T03:12:44.639678", + "cpu_percent": 7.7, + "ram_percent": 69.1, + "ram_used_gb": 43.984352111816406, + "gpu_memory_used": 3486.0 }, { - "timestamp": "2024-12-31T01:04:15.365165", - "cpu_percent": 3.8, - "ram_percent": 49.6, - "ram_used_gb": 31.576663970947266, - "gpu_memory_used": 3891.0 + "timestamp": "2024-12-31T03:12:44.731107", + "cpu_percent": 8.3, + "ram_percent": 69.1, + "ram_used_gb": 43.97468948364258, + "gpu_memory_used": 3484.0 }, { - "timestamp": "2024-12-31T01:04:16.687140", - "cpu_percent": 17.7, - "ram_percent": 49.6, - "ram_used_gb": 31.56537628173828, - "gpu_memory_used": 3871.0 + "timestamp": "2024-12-31T03:12:46.189723", + "cpu_percent": 14.2, + "ram_percent": 69.1, + "ram_used_gb": 43.98275375366211, + "gpu_memory_used": 3697.0 }, { - "timestamp": "2024-12-31T01:04:16.773937", - "cpu_percent": 5.4, - "ram_percent": 49.6, - "ram_used_gb": 31.566661834716797, - "gpu_memory_used": 3871.0 + "timestamp": "2024-12-31T03:12:46.265437", + "cpu_percent": 4.7, + "ram_percent": 69.1, + "ram_used_gb": 43.982975006103516, + "gpu_memory_used": 3697.0 }, { - "timestamp": "2024-12-31T01:04:18.989053", + "timestamp": "2024-12-31T03:12:48.536216", + "cpu_percent": 12.5, + "ram_percent": 69.0, + "ram_used_gb": 43.86142349243164, + "gpu_memory_used": 3697.0 + }, + { + "timestamp": "2024-12-31T03:12:48.603827", + "cpu_percent": 6.2, + "ram_percent": 69.0, + "ram_used_gb": 43.8692626953125, + "gpu_memory_used": 3694.0 + }, + { + "timestamp": "2024-12-31T03:12:51.905764", + "cpu_percent": 14.2, + "ram_percent": 69.1, + "ram_used_gb": 43.93961715698242, + "gpu_memory_used": 3690.0 + }, + { + "timestamp": "2024-12-31T03:12:52.028178", + "cpu_percent": 26.0, + "ram_percent": 69.1, + "ram_used_gb": 43.944759368896484, + "gpu_memory_used": 3690.0 + }, + { + "timestamp": "2024-12-31T03:12:55.320709", + "cpu_percent": 13.2, + "ram_percent": 69.1, + "ram_used_gb": 43.943058013916016, + "gpu_memory_used": 3685.0 + }, + { + "timestamp": "2024-12-31T03:12:55.386582", + "cpu_percent": 3.2, + "ram_percent": 69.1, + "ram_used_gb": 43.9305419921875, + "gpu_memory_used": 3685.0 + }, + { + "timestamp": "2024-12-31T03:12:59.492304", "cpu_percent": 15.6, - "ram_percent": 49.7, - "ram_used_gb": 31.589736938476562, - "gpu_memory_used": 3864.0 + "ram_percent": 69.1, + "ram_used_gb": 43.964195251464844, + "gpu_memory_used": 4053.0 }, { - "timestamp": "2024-12-31T01:04:19.072702", - "cpu_percent": 7.3, - "ram_percent": 49.7, - "ram_used_gb": 31.584060668945312, - "gpu_memory_used": 3855.0 + "timestamp": "2024-12-31T03:12:59.586143", + "cpu_percent": 2.1, + "ram_percent": 69.1, + "ram_used_gb": 43.9642448425293, + "gpu_memory_used": 4053.0 }, { - "timestamp": "2024-12-31T01:04:21.875793", - "cpu_percent": 13.3, - "ram_percent": 49.6, - "ram_used_gb": 31.573482513427734, - "gpu_memory_used": 3892.0 + "timestamp": "2024-12-31T03:13:04.705286", + "cpu_percent": 12.0, + "ram_percent": 69.2, + "ram_used_gb": 43.992374420166016, + "gpu_memory_used": 4059.0 }, { - "timestamp": "2024-12-31T01:04:21.962946", + "timestamp": "2024-12-31T03:13:04.779475", + "cpu_percent": 4.7, + "ram_percent": 69.2, + "ram_used_gb": 43.9922981262207, + "gpu_memory_used": 4059.0 + }, + { + "timestamp": "2024-12-31T03:13:10.063292", + "cpu_percent": 12.4, + "ram_percent": 69.2, + "ram_used_gb": 44.004146575927734, + "gpu_memory_used": 4041.0 + }, + { + "timestamp": "2024-12-31T03:13:10.155395", + "cpu_percent": 6.8, + "ram_percent": 69.2, + "ram_used_gb": 44.004215240478516, + "gpu_memory_used": 4041.0 + }, + { + "timestamp": "2024-12-31T03:13:16.097887", + "cpu_percent": 13.1, + "ram_percent": 69.2, + "ram_used_gb": 44.0260009765625, + "gpu_memory_used": 4042.0 + }, + { + "timestamp": "2024-12-31T03:13:16.171478", "cpu_percent": 4.5, - "ram_percent": 49.6, - "ram_used_gb": 31.571670532226562, - "gpu_memory_used": 3882.0 + "ram_percent": 69.2, + "ram_used_gb": 44.02027130126953, + "gpu_memory_used": 4042.0 }, { - "timestamp": "2024-12-31T01:04:25.196632", - "cpu_percent": 14.5, - "ram_percent": 49.7, - "ram_used_gb": 31.587650299072266, - "gpu_memory_used": 3877.0 - }, - { - "timestamp": "2024-12-31T01:04:25.272481", - "cpu_percent": 5.1, - "ram_percent": 49.7, - "ram_used_gb": 31.589813232421875, - "gpu_memory_used": 3877.0 - }, - { - "timestamp": "2024-12-31T01:04:30.563097", - "cpu_percent": 12.8, - "ram_percent": 49.5, - "ram_used_gb": 31.491657257080078, - "gpu_memory_used": 4174.0 - }, - { - "timestamp": "2024-12-31T01:04:30.631986", - "cpu_percent": 3.9, - "ram_percent": 49.5, - "ram_used_gb": 31.508201599121094, - "gpu_memory_used": 4174.0 - }, - { - "timestamp": "2024-12-31T01:04:37.566460", + "timestamp": "2024-12-31T03:13:23.044945", "cpu_percent": 12.6, - "ram_percent": 49.5, - "ram_used_gb": 31.482032775878906, - "gpu_memory_used": 4170.0 + "ram_percent": 69.2, + "ram_used_gb": 44.03746795654297, + "gpu_memory_used": 4044.0 }, { - "timestamp": "2024-12-31T01:04:37.633258", - "cpu_percent": 5.7, - "ram_percent": 49.5, - "ram_used_gb": 31.497997283935547, - "gpu_memory_used": 4170.0 + "timestamp": "2024-12-31T03:13:23.127442", + "cpu_percent": 8.3, + "ram_percent": 69.2, + "ram_used_gb": 44.0373420715332, + "gpu_memory_used": 4044.0 }, { - "timestamp": "2024-12-31T01:04:47.605037", - "cpu_percent": 12.7, - "ram_percent": 49.6, - "ram_used_gb": 31.576217651367188, - "gpu_memory_used": 4170.0 + "timestamp": "2024-12-31T03:13:36.780309", + "cpu_percent": 12.5, + "ram_percent": 69.2, + "ram_used_gb": 44.00790786743164, + "gpu_memory_used": 4034.0 }, { - "timestamp": "2024-12-31T01:04:47.675914", - "cpu_percent": 7.5, - "ram_percent": 49.6, - "ram_used_gb": 31.57619857788086, - "gpu_memory_used": 4165.0 + "timestamp": "2024-12-31T03:13:36.853474", + "cpu_percent": 6.2, + "ram_percent": 69.2, + "ram_used_gb": 44.00779724121094, + "gpu_memory_used": 4034.0 }, { - "timestamp": "2024-12-31T01:05:03.055071", - "cpu_percent": 14.3, - "ram_percent": 50.4, - "ram_used_gb": 32.03488540649414, - "gpu_memory_used": 4175.0 + "timestamp": "2024-12-31T03:13:57.449274", + "cpu_percent": 12.4, + "ram_percent": 69.2, + "ram_used_gb": 44.0432243347168, + "gpu_memory_used": 4034.0 }, { - "timestamp": "2024-12-31T01:05:03.129251", - "cpu_percent": 7.6, - "ram_percent": 50.4, - "ram_used_gb": 32.03443908691406, - "gpu_memory_used": 4175.0 + "timestamp": "2024-12-31T03:13:57.524592", + "cpu_percent": 6.2, + "ram_percent": 69.2, + "ram_used_gb": 44.03204345703125, + "gpu_memory_used": 4034.0 }, { - "timestamp": "2024-12-31T01:05:23.090387", + "timestamp": "2024-12-31T03:14:24.698822", "cpu_percent": 13.4, - "ram_percent": 50.5, - "ram_used_gb": 32.15056228637695, - "gpu_memory_used": 4167.0 + "ram_percent": 69.5, + "ram_used_gb": 44.18327331542969, + "gpu_memory_used": 4480.0 }, { - "timestamp": "2024-12-31T01:05:23.171179", - "cpu_percent": 12.2, - "ram_percent": 50.5, - "ram_used_gb": 32.139862060546875, - "gpu_memory_used": 4167.0 + "timestamp": "2024-12-31T03:14:24.783683", + "cpu_percent": 4.2, + "ram_percent": 69.5, + "ram_used_gb": 44.182212829589844, + "gpu_memory_used": 4480.0 }, { - "timestamp": "2024-12-31T01:05:51.417887", - "cpu_percent": 16.5, - "ram_percent": 49.0, - "ram_used_gb": 31.147480010986328, - "gpu_memory_used": 4574.0 + "timestamp": "2024-12-31T03:14:58.242642", + "cpu_percent": 12.8, + "ram_percent": 69.5, + "ram_used_gb": 44.20225524902344, + "gpu_memory_used": 4476.0 }, { - "timestamp": "2024-12-31T01:05:51.516422", - "cpu_percent": 5.0, - "ram_percent": 49.0, - "ram_used_gb": 31.142948150634766, - "gpu_memory_used": 4574.0 + "timestamp": "2024-12-31T03:14:58.310907", + "cpu_percent": 2.9, + "ram_percent": 69.5, + "ram_used_gb": 44.19659423828125, + "gpu_memory_used": 4476.0 }, { - "timestamp": "2024-12-31T01:06:50.847907", - "cpu_percent": 13.5, - "ram_percent": 51.5, - "ram_used_gb": 32.776729583740234, - "gpu_memory_used": 4585.0 + "timestamp": "2024-12-31T03:15:42.196813", + "cpu_percent": 14.3, + "ram_percent": 69.9, + "ram_used_gb": 44.43781661987305, + "gpu_memory_used": 4494.0 }, { - "timestamp": "2024-12-31T01:06:50.958054", - "cpu_percent": 8.0, - "ram_percent": 51.5, - "ram_used_gb": 32.75238800048828, - "gpu_memory_used": 4585.0 - }, - { - "timestamp": "2024-12-31T01:08:44.351359", + "timestamp": "2024-12-31T03:15:42.288427", "cpu_percent": 13.7, - "ram_percent": 52.2, - "ram_used_gb": 33.209136962890625, - "gpu_memory_used": 4664.0 + "ram_percent": 69.9, + "ram_used_gb": 44.439701080322266, + "gpu_memory_used": 4494.0 + }, + { + "timestamp": "2024-12-31T03:16:35.483849", + "cpu_percent": 14.7, + "ram_percent": 65.0, + "ram_used_gb": 41.35385513305664, + "gpu_memory_used": 4506.0 + }, + { + "timestamp": "2024-12-31T03:16:35.626628", + "cpu_percent": 32.9, + "ram_percent": 65.0, + "ram_used_gb": 41.34442138671875, + "gpu_memory_used": 4506.0 + }, + { + "timestamp": "2024-12-31T03:17:29.378353", + "cpu_percent": 13.4, + "ram_percent": 64.3, + "ram_used_gb": 40.8721809387207, + "gpu_memory_used": 4485.0 + }, + { + "timestamp": "2024-12-31T03:17:29.457464", + "cpu_percent": 5.1, + "ram_percent": 64.3, + "ram_used_gb": 40.875389099121094, + "gpu_memory_used": 4485.0 + }, + { + "timestamp": "2024-12-31T03:18:31.955862", + "cpu_percent": 14.3, + "ram_percent": 65.0, + "ram_used_gb": 41.360206604003906, + "gpu_memory_used": 4484.0 + }, + { + "timestamp": "2024-12-31T03:18:32.038999", + "cpu_percent": 12.5, + "ram_percent": 65.0, + "ram_used_gb": 41.37223434448242, + "gpu_memory_used": 4484.0 + }, + { + "timestamp": "2024-12-31T03:19:46.454105", + "cpu_percent": 13.9, + "ram_percent": 65.3, + "ram_used_gb": 41.562198638916016, + "gpu_memory_used": 4487.0 + }, + { + "timestamp": "2024-12-31T03:19:46.524303", + "cpu_percent": 6.8, + "ram_percent": 65.3, + "ram_used_gb": 41.56681442260742, + "gpu_memory_used": 4487.0 + }, + { + "timestamp": "2024-12-31T03:21:25.251452", + "cpu_percent": 23.7, + "ram_percent": 62.0, + "ram_used_gb": 39.456459045410156, + "gpu_memory_used": 4488.0 + }, + { + "timestamp": "2024-12-31T03:21:25.348643", + "cpu_percent": 2.9, + "ram_percent": 62.0, + "ram_used_gb": 39.454288482666016, + "gpu_memory_used": 4487.0 + }, + { + "timestamp": "2024-12-31T03:22:53.939896", + "cpu_percent": 12.9, + "ram_percent": 62.1, + "ram_used_gb": 39.50320053100586, + "gpu_memory_used": 4488.0 + }, + { + "timestamp": "2024-12-31T03:22:54.041607", + "cpu_percent": 8.3, + "ram_percent": 62.1, + "ram_used_gb": 39.49895095825195, + "gpu_memory_used": 4488.0 + }, + { + "timestamp": "2024-12-31T03:24:33.835432", + "cpu_percent": 12.9, + "ram_percent": 62.3, + "ram_used_gb": 39.647212982177734, + "gpu_memory_used": 4503.0 + }, + { + "timestamp": "2024-12-31T03:24:33.923914", + "cpu_percent": 7.6, + "ram_percent": 62.3, + "ram_used_gb": 39.64302062988281, + "gpu_memory_used": 4503.0 + }, + { + "timestamp": "2024-12-31T03:26:22.021598", + "cpu_percent": 12.9, + "ram_percent": 58.4, + "ram_used_gb": 37.162540435791016, + "gpu_memory_used": 4491.0 + }, + { + "timestamp": "2024-12-31T03:26:22.142138", + "cpu_percent": 12.0, + "ram_percent": 58.4, + "ram_used_gb": 37.162010192871094, + "gpu_memory_used": 4487.0 + }, + { + "timestamp": "2024-12-31T03:28:15.970365", + "cpu_percent": 15.0, + "ram_percent": 58.2, + "ram_used_gb": 37.04011535644531, + "gpu_memory_used": 4481.0 + }, + { + "timestamp": "2024-12-31T03:28:16.096459", + "cpu_percent": 12.4, + "ram_percent": 58.2, + "ram_used_gb": 37.035972595214844, + "gpu_memory_used": 4473.0 + }, + { + "timestamp": "2024-12-31T03:30:17.092257", + "cpu_percent": 12.4, + "ram_percent": 58.4, + "ram_used_gb": 37.14639663696289, + "gpu_memory_used": 4459.0 } ] } \ No newline at end of file diff --git a/examples/benchmarks/benchmark_stats.txt b/examples/benchmarks/benchmark_stats.txt index 8acc516..c2a9b02 100644 --- a/examples/benchmarks/benchmark_stats.txt +++ b/examples/benchmarks/benchmark_stats.txt @@ -1,19 +1,19 @@ === Benchmark Statistics === Overall Stats: -Total tokens processed: 37750 -Total audio generated: 11724.08s -Total test duration: 270.20s -Average processing rate: 143.76 tokens/second -Average realtime factor: 45.13x +Total tokens processed: 140500 +Total audio generated: 43469.18s +Total test duration: 1061.20s +Average processing rate: 137.67 tokens/second +Average realtime factor: 42.93x Per-chunk Stats: -Average chunk size: 2903.85 tokens +Average chunk size: 5620.00 tokens Min chunk size: 100.00 tokens Max chunk size: 16000.00 tokens -Average processing time: 20.27s -Average output length: 901.85s +Average processing time: 41.13s +Average output length: 1738.77s Performance Ranges: -Processing rate range: 103.56 - 159.43 tokens/second -Realtime factor range: 32.26x - 50.46x +Processing rate range: 11.70 - 155.99 tokens/second +Realtime factor range: 3.65x - 49.46x diff --git a/examples/benchmarks/benchmark_tts.py b/examples/benchmarks/benchmark_tts.py index 2e657ce..f17e6ee 100644 --- a/examples/benchmarks/benchmark_tts.py +++ b/examples/benchmarks/benchmark_tts.py @@ -103,7 +103,7 @@ def make_tts_request(text: str, timeout: int = 120) -> tuple[float, float]: response = requests.post( "http://localhost:8880/v1/audio/speech", json={ - "model": "tts-1", + "model": "kokoro", "input": text, "voice": "af", "response_format": "wav", @@ -240,15 +240,15 @@ def main(): print(f"Total tokens in file: {total_tokens}") # Generate token sizes with dense sampling at start and increasing intervals - dense_range = list(range(100, 600, 100)) # 100, 200, 300, 400, 500 - medium_range = [750, 1000, 1500, 2000, 3000] + dense_range = list(range(100, 1001, 100)) + current = max(dense_range) large_range = [] - current = 4000 while current <= total_tokens: large_range.append(current) - current *= 2 + current += 1000 - token_sizes = dense_range + medium_range + large_range + token_sizes = sorted(list(set(dense_range + large_range))) + print(f"Testing sizes: {token_sizes}") # Process chunks results = [] diff --git a/examples/benchmarks/processing_time.png b/examples/benchmarks/processing_time.png index a51e8c2..c66fcaf 100644 Binary files a/examples/benchmarks/processing_time.png and b/examples/benchmarks/processing_time.png differ diff --git a/examples/benchmarks/realtime_factor.png b/examples/benchmarks/realtime_factor.png index e65fb09..249685b 100644 Binary files a/examples/benchmarks/realtime_factor.png and b/examples/benchmarks/realtime_factor.png differ diff --git a/examples/benchmarks/system_usage.png b/examples/benchmarks/system_usage.png index eccfb6d..bc10eb0 100644 Binary files a/examples/benchmarks/system_usage.png and b/examples/benchmarks/system_usage.png differ