diff --git a/Dockerfile b/Dockerfile
index a58ced6..e06d314 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,16 +1,3 @@
-# Stage 1: Clone model repository
-FROM alpine/git:latest AS model_layer
-ARG KOKORO_REPO=https://huggingface.co/hexgrad/Kokoro-82M
-ARG KOKORO_COMMIT=a67f11354c3e38c58c3327498bc4bd1e57e71c50
-
-RUN git lfs install --skip-repo
-WORKDIR /app/Kokoro-82M
-RUN GIT_LFS_SKIP_SMUDGE=1 git clone ${KOKORO_REPO} . && \
- git checkout ${KOKORO_COMMIT} && \
- git lfs pull && \
- ls -la
-
-# Stage 2: Build
FROM nvidia/cuda:12.1.0-base-ubuntu22.04
# Install base system dependencies
@@ -30,28 +17,27 @@ RUN pip3 install --no-cache-dir torch==2.5.1 --extra-index-url https://download.
COPY requirements.txt .
RUN pip3 install --no-cache-dir -r requirements.txt
+# Copy application code and model
+COPY . /app/
+
# Set working directory
WORKDIR /app
-
# Run with Python unbuffered output for live logging
ENV PYTHONUNBUFFERED=1
-# Copy model files from git clone stage
-COPY --from=model_layer /app/Kokoro-82M /app/Kokoro-82M
-
# Create non-root user
RUN useradd -m -u 1000 appuser
-# Create and set permissions for output directory
-RUN mkdir -p /app/api/src/output && \
- chown -R appuser:appuser /app/api/src/output
-
-# Set Python path (app first for our imports, then model dir for model imports)
-ENV PYTHONPATH=/app:/app/Kokoro-82M
+# Create directories and set permissions
+RUN mkdir -p /app/Kokoro-82M && \
+ chown -R appuser:appuser /app
# Switch to non-root user
USER appuser
+# Set Python path (app first for our imports, then model dir for model imports)
+ENV PYTHONPATH=/app:/app/Kokoro-82M
+
# Run FastAPI server with debug logging and reload
CMD ["uvicorn", "api.src.main:app", "--host", "0.0.0.0", "--port", "8880", "--log-level", "debug"]
diff --git a/README.md b/README.md
index 35309e9..a40da19 100644
--- a/README.md
+++ b/README.md
@@ -3,81 +3,114 @@
# Kokoro TTS API
-[](https://huggingface.co/hexgrad/Kokoro-82M/tree/a67f11354c3e38c58c3327498bc4bd1e57e71c50)
+[](https://huggingface.co/hexgrad/Kokoro-82M/tree/8228a351f87c8a6076502c1e3b7e72e821ebec9a)
+[]()
+[]()
FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model.
-Dockerized with NVIDIA GPU support, simple queue handling via sqllite, and automatic chunking/stitching on lengthy input/outputs
+OpenAI-compatible API with NVIDIA GPU support, with automatic chunking/stitching for long texts, and very fast generation time (~35-49x RTF)
## Quick Start
+1. Install prerequisites:
+ - Install [Docker Desktop](https://www.docker.com/products/docker-desktop/)
+ - Install [Git](https://git-scm.com/downloads) (or download and extract zip)
+
+2. Clone and run:
```bash
+# Clone repository
+git clone https://github.com/remsky/Kokoro-FastAPI.git
+cd Kokoro-FastAPI
+
# Start the API (will automatically clone source HF repo via git-lfs)
docker compose up --build
```
-Test it out:
+Test all voices:
```bash
-# From host terminal
-python examples/test_tts.py "Hello world" --voice af_bella
+python examples/test_all_voices.py
+```
+
+Test OpenAI compatibility:
+```bash
+python examples/test_openai_tts.py
+```
+
+## OpenAI-Compatible API
+
+List available voices:
+```python
+import requests
+
+response = requests.get("http://localhost:8000/audio/voices")
+voices = response.json()["voices"]
+```
+
+Generate speech:
+```python
+import requests
+
+response = requests.post(
+ "http://localhost:8000/audio/speech",
+ json={
+ "model": "kokoro", # Not used but required for compatibility
+ "input": "Hello world!",
+ "voice": "af_bella",
+ "response_format": "mp3", # Supported: mp3, wav, opus, flac, aac
+ "speed": 1.0
+ }
+)
+
+# Save audio
+with open("output.mp3", "wb") as f:
+ f.write(response.content)
+```
+
+Using OpenAI's Python library:
+```python
+from openai import OpenAI
+
+client = OpenAI(base_url="http://localhost:8000", api_key="not-needed")
+
+response = client.audio.speech.create(
+ model="kokoro", # Not used but required for compatibility, also accepts library defaults
+ voice="af_bella",
+ input="Hello world!",
+ response_format="mp3"
+)
+
+response.stream_to_file("output.mp3")
```
## Performance Benchmarks
-Benchmarking was performed solely on generation via the local API (ignoring file transfers) using various text lengths up to ~10 minutes output, measuring processing time, token count, and output audio length. Tests were run on:
+Benchmarking was performed on generation via the local API using text lengths up to feature-length books (~1.5 hours output), measuring processing time and realtime factor. Tests were run on:
- Windows 11 Home w/ WSL2
- NVIDIA 4060Ti 16gb GPU @ CUDA 12.1
- 11th Gen i7-11700 @ 2.5GHz
- 64gb RAM
-- Randomized chunks from H.G. Wells - The Time Machine
+- H.G. Wells - The Time Machine (full text)
-
-
+
+
+Key Performance Metrics:
+- Realtime Factor: Ranges between 35-49x (generation time to output audio length)
+- Average Processing Rate: 137.67 tokens/second
+- Efficient Scaling: Maintains performance with long texts through automatic chunking
+- Natural Boundary Detection: Automatically splits and stitches at sentence boundaries to prevent artifacts
-- Average processing speed: ~3.4 seconds per minute of audio output
-- Efficient token processing: ~0.01 seconds per token
-- Scales well with longer texts, maintains consistent performance
+## Features
-## API Endpoints
-
-```bash
-GET /tts/voices # List available voices
-POST /tts # Generate speech
-GET /tts/{request_id} # Check generation status
-GET /tts/file/{request_id} # Download audio file
-```
-
-## Example Usage
-
-List available voices:
-```bash
-python examples/test_tts.py
-```
-
-Generate speech:
-```bash
-# Default voice
-python examples/test_tts.py "Your text here"
-
-# Specific voice
-python examples/test_tts.py --voice af_bella "Your text here"
-
-# Get file path without downloading
-python examples/test_tts.py --no-download "Your text here"
-```
-
-Generated files are saved in:
-- With download: `examples/output/`
-- Without download: `src/output/` (in API container)
-
-## Requirements
-
-- Docker
-- NVIDIA GPU + CUDA
-- nvidia-container-toolkit installed on host
+- OpenAI-compatible API endpoints
+- Multiple audio formats: mp3, wav, opus, flac, aac
+- Automatic text chunking and audio stitching
+- GPU-accelerated inference
+- Queue handling via SQLite
+- Progress tracking for long generations
## Model
diff --git a/api/src/services/tts.py b/api/src/services/tts.py
index 76b83cc..7224b0e 100644
--- a/api/src/services/tts.py
+++ b/api/src/services/tts.py
@@ -92,7 +92,7 @@ class TTSService:
# Validate phonemization first
ps = phonemize(chunk, voice[0])
tokens = tokenize(ps)
- logger.info(
+ logger.debug(
f"Processing chunk {i+1}/{len(chunks)}: {len(tokens)} tokens"
)
diff --git a/api/src/structures/schemas.py b/api/src/structures/schemas.py
index 5a36e4a..2031e97 100644
--- a/api/src/structures/schemas.py
+++ b/api/src/structures/schemas.py
@@ -26,6 +26,7 @@ class OpenAISpeechRequest(BaseModel):
"bf_emma",
"af_sarah",
"af_bella",
+ "af_nicole",
] = Field(default="af", description="The voice to use for generation")
response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] = Field(
default="mp3",
diff --git a/docker-compose.yml b/docker-compose.yml
index 1d2ae89..aacb121 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,11 +1,33 @@
services:
+ model-fetcher:
+ image: datamachines/git-lfs:latest
+ volumes:
+ - ./Kokoro-82M:/app/Kokoro-82M
+ working_dir: /app/Kokoro-82M
+ command: >
+ sh -c "
+ if [ -z \"$(ls -A .)\" ]; then
+ git clone https://huggingface.co/hexgrad/Kokoro-82M . && \
+ git checkout 8228a351f87c8a6076502c1e3b7e72e821ebec9a;
+ touch .cloned;
+ else
+ touch .cloned;
+ fi;
+ tail -f /dev/null
+ "
+ healthcheck:
+ test: ["CMD", "test", "-f", ".cloned"]
+ interval: 1s
+ timeout: 1s
+ retries: 120
+ start_period: 1s
+
kokoro-tts:
build:
context: .
volumes:
- ./api/src:/app/api/src
- ./Kokoro-82M:/app/Kokoro-82M
- - ./api/src/output:/app/api/src/output
ports:
- "8880:8880"
environment:
@@ -17,3 +39,6 @@ services:
- driver: nvidia
count: 1
capabilities: [gpu]
+ depends_on:
+ model-fetcher:
+ condition: service_healthy
diff --git a/examples/benchmarks/benchmark_results.json b/examples/benchmarks/benchmark_results.json
index c72f896..373cebe 100644
--- a/examples/benchmarks/benchmark_results.json
+++ b/examples/benchmarks/benchmark_results.json
@@ -2,278 +2,530 @@
"results": [
{
"tokens": 100,
- "processing_time": 0.965601921081543,
+ "processing_time": 8.54442310333252,
"output_length": 31.15,
- "realtime_factor": 32.25967069857295,
- "elapsed_time": 1.1258533000946045
+ "realtime_factor": 3.6456527987068887,
+ "elapsed_time": 8.720048666000366
},
{
"tokens": 200,
- "processing_time": 1.2544827461242676,
+ "processing_time": 1.3838517665863037,
"output_length": 62.6,
- "realtime_factor": 49.901045027046486,
- "elapsed_time": 2.52616286277771
+ "realtime_factor": 45.236058883981606,
+ "elapsed_time": 10.258155345916748
},
{
"tokens": 300,
- "processing_time": 2.121187686920166,
- "output_length": 96.475,
- "realtime_factor": 45.48159533213006,
- "elapsed_time": 4.830777883529663
+ "processing_time": 2.2024788856506348,
+ "output_length": 96.325,
+ "realtime_factor": 43.73481200095347,
+ "elapsed_time": 12.594647407531738
},
{
"tokens": 400,
- "processing_time": 2.715940475463867,
- "output_length": 128.675,
- "realtime_factor": 47.3776951897383,
- "elapsed_time": 7.719157934188843
+ "processing_time": 3.175424098968506,
+ "output_length": 128.55,
+ "realtime_factor": 40.48278150995886,
+ "elapsed_time": 16.005898475646973
},
{
"tokens": 500,
- "processing_time": 3.1464896202087402,
- "output_length": 158.775,
- "realtime_factor": 50.46099595569833,
- "elapsed_time": 11.027976274490356
+ "processing_time": 3.205301523208618,
+ "output_length": 158.55,
+ "realtime_factor": 49.46492517224587,
+ "elapsed_time": 19.377076625823975
},
{
- "tokens": 750,
- "processing_time": 5.136769533157349,
- "output_length": 237.25,
- "realtime_factor": 46.18661562847511,
- "elapsed_time": 16.39173412322998
+ "tokens": 600,
+ "processing_time": 3.9976348876953125,
+ "output_length": 189.225,
+ "realtime_factor": 47.33423769700254,
+ "elapsed_time": 23.568575859069824
+ },
+ {
+ "tokens": 700,
+ "processing_time": 4.98036003112793,
+ "output_length": 222.05,
+ "realtime_factor": 44.58513011351734,
+ "elapsed_time": 28.767319917678833
+ },
+ {
+ "tokens": 800,
+ "processing_time": 5.156893491744995,
+ "output_length": 253.825,
+ "realtime_factor": 49.22052402406907,
+ "elapsed_time": 34.1369092464447
+ },
+ {
+ "tokens": 900,
+ "processing_time": 5.8110880851745605,
+ "output_length": 283.75,
+ "realtime_factor": 48.82906537312906,
+ "elapsed_time": 40.16419458389282
},
{
"tokens": 1000,
- "processing_time": 6.8030219078063965,
- "output_length": 315.575,
- "realtime_factor": 46.38747372515161,
- "elapsed_time": 23.391889572143555
- },
- {
- "tokens": 1500,
- "processing_time": 9.774210453033447,
- "output_length": 470.75,
- "realtime_factor": 48.16245795627428,
- "elapsed_time": 33.43451166152954
+ "processing_time": 6.686216354370117,
+ "output_length": 315.45,
+ "realtime_factor": 47.17914935460046,
+ "elapsed_time": 47.11375427246094
},
{
"tokens": 2000,
- "processing_time": 15.099190711975098,
- "output_length": 625.625,
- "realtime_factor": 41.43433988841665,
- "elapsed_time": 48.88647747039795
+ "processing_time": 13.290695905685425,
+ "output_length": 624.925,
+ "realtime_factor": 47.01973504131358,
+ "elapsed_time": 60.842002630233765
},
{
"tokens": 3000,
- "processing_time": 19.526690244674683,
- "output_length": 932.775,
- "realtime_factor": 47.76923217975388,
- "elapsed_time": 68.92684745788574
+ "processing_time": 20.058005571365356,
+ "output_length": 932.05,
+ "realtime_factor": 46.46773063671828,
+ "elapsed_time": 81.50969815254211
},
{
"tokens": 4000,
- "processing_time": 27.7449471950531,
- "output_length": 1224.2,
- "realtime_factor": 44.12334942984767,
- "elapsed_time": 97.2753164768219
+ "processing_time": 26.38338828086853,
+ "output_length": 1222.975,
+ "realtime_factor": 46.353978002394015,
+ "elapsed_time": 108.76348638534546
+ },
+ {
+ "tokens": 5000,
+ "processing_time": 32.472310066223145,
+ "output_length": 1525.15,
+ "realtime_factor": 46.967708699801484,
+ "elapsed_time": 142.2994668483734
+ },
+ {
+ "tokens": 6000,
+ "processing_time": 42.67592263221741,
+ "output_length": 1837.525,
+ "realtime_factor": 43.0576514030137,
+ "elapsed_time": 186.26759266853333
+ },
+ {
+ "tokens": 7000,
+ "processing_time": 51.601537466049194,
+ "output_length": 2146.875,
+ "realtime_factor": 41.60486499869347,
+ "elapsed_time": 239.59922289848328
},
{
"tokens": 8000,
- "processing_time": 58.24176383018494,
- "output_length": 2459.625,
- "realtime_factor": 42.23129311762449,
- "elapsed_time": 156.6953580379486
+ "processing_time": 51.86434292793274,
+ "output_length": 2458.425,
+ "realtime_factor": 47.401063258741466,
+ "elapsed_time": 293.4462616443634
+ },
+ {
+ "tokens": 9000,
+ "processing_time": 60.4497971534729,
+ "output_length": 2772.1,
+ "realtime_factor": 45.857887545297416,
+ "elapsed_time": 356.02399826049805
+ },
+ {
+ "tokens": 10000,
+ "processing_time": 71.75962543487549,
+ "output_length": 3085.625,
+ "realtime_factor": 42.99945800024164,
+ "elapsed_time": 430.50863671302795
+ },
+ {
+ "tokens": 11000,
+ "processing_time": 96.66409230232239,
+ "output_length": 3389.3,
+ "realtime_factor": 35.062657904030935,
+ "elapsed_time": 529.3296246528625
+ },
+ {
+ "tokens": 12000,
+ "processing_time": 85.70126295089722,
+ "output_length": 3703.175,
+ "realtime_factor": 43.21027336693678,
+ "elapsed_time": 618.0248212814331
+ },
+ {
+ "tokens": 13000,
+ "processing_time": 97.2874686717987,
+ "output_length": 4030.825,
+ "realtime_factor": 41.43210893479068,
+ "elapsed_time": 717.9070522785187
+ },
+ {
+ "tokens": 14000,
+ "processing_time": 105.1045708656311,
+ "output_length": 4356.775,
+ "realtime_factor": 41.451812838566596,
+ "elapsed_time": 826.1140224933624
+ },
+ {
+ "tokens": 15000,
+ "processing_time": 111.0716404914856,
+ "output_length": 4663.325,
+ "realtime_factor": 41.984839508672565,
+ "elapsed_time": 940.0645899772644
},
{
"tokens": 16000,
- "processing_time": 110.98782324790955,
- "output_length": 4980.6,
- "realtime_factor": 44.875193100012524,
- "elapsed_time": 270.2029483318329
+ "processing_time": 116.61742973327637,
+ "output_length": 4978.65,
+ "realtime_factor": 42.692160266154104,
+ "elapsed_time": 1061.1957621574402
}
],
"system_metrics": [
{
- "timestamp": "2024-12-31T01:04:14.249314",
- "cpu_percent": 16.3,
- "ram_percent": 49.7,
- "ram_used_gb": 31.605762481689453,
- "gpu_memory_used": 3861.0
+ "timestamp": "2024-12-31T03:12:36.009478",
+ "cpu_percent": 8.1,
+ "ram_percent": 66.8,
+ "ram_used_gb": 42.47850799560547,
+ "gpu_memory_used": 2124.0
},
{
- "timestamp": "2024-12-31T01:04:15.292017",
- "cpu_percent": 16.0,
- "ram_percent": 49.7,
- "ram_used_gb": 31.584003448486328,
- "gpu_memory_used": 3891.0
+ "timestamp": "2024-12-31T03:12:44.639678",
+ "cpu_percent": 7.7,
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.984352111816406,
+ "gpu_memory_used": 3486.0
},
{
- "timestamp": "2024-12-31T01:04:15.365165",
- "cpu_percent": 3.8,
- "ram_percent": 49.6,
- "ram_used_gb": 31.576663970947266,
- "gpu_memory_used": 3891.0
+ "timestamp": "2024-12-31T03:12:44.731107",
+ "cpu_percent": 8.3,
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.97468948364258,
+ "gpu_memory_used": 3484.0
},
{
- "timestamp": "2024-12-31T01:04:16.687140",
- "cpu_percent": 17.7,
- "ram_percent": 49.6,
- "ram_used_gb": 31.56537628173828,
- "gpu_memory_used": 3871.0
+ "timestamp": "2024-12-31T03:12:46.189723",
+ "cpu_percent": 14.2,
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.98275375366211,
+ "gpu_memory_used": 3697.0
},
{
- "timestamp": "2024-12-31T01:04:16.773937",
- "cpu_percent": 5.4,
- "ram_percent": 49.6,
- "ram_used_gb": 31.566661834716797,
- "gpu_memory_used": 3871.0
+ "timestamp": "2024-12-31T03:12:46.265437",
+ "cpu_percent": 4.7,
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.982975006103516,
+ "gpu_memory_used": 3697.0
},
{
- "timestamp": "2024-12-31T01:04:18.989053",
+ "timestamp": "2024-12-31T03:12:48.536216",
+ "cpu_percent": 12.5,
+ "ram_percent": 69.0,
+ "ram_used_gb": 43.86142349243164,
+ "gpu_memory_used": 3697.0
+ },
+ {
+ "timestamp": "2024-12-31T03:12:48.603827",
+ "cpu_percent": 6.2,
+ "ram_percent": 69.0,
+ "ram_used_gb": 43.8692626953125,
+ "gpu_memory_used": 3694.0
+ },
+ {
+ "timestamp": "2024-12-31T03:12:51.905764",
+ "cpu_percent": 14.2,
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.93961715698242,
+ "gpu_memory_used": 3690.0
+ },
+ {
+ "timestamp": "2024-12-31T03:12:52.028178",
+ "cpu_percent": 26.0,
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.944759368896484,
+ "gpu_memory_used": 3690.0
+ },
+ {
+ "timestamp": "2024-12-31T03:12:55.320709",
+ "cpu_percent": 13.2,
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.943058013916016,
+ "gpu_memory_used": 3685.0
+ },
+ {
+ "timestamp": "2024-12-31T03:12:55.386582",
+ "cpu_percent": 3.2,
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.9305419921875,
+ "gpu_memory_used": 3685.0
+ },
+ {
+ "timestamp": "2024-12-31T03:12:59.492304",
"cpu_percent": 15.6,
- "ram_percent": 49.7,
- "ram_used_gb": 31.589736938476562,
- "gpu_memory_used": 3864.0
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.964195251464844,
+ "gpu_memory_used": 4053.0
},
{
- "timestamp": "2024-12-31T01:04:19.072702",
- "cpu_percent": 7.3,
- "ram_percent": 49.7,
- "ram_used_gb": 31.584060668945312,
- "gpu_memory_used": 3855.0
+ "timestamp": "2024-12-31T03:12:59.586143",
+ "cpu_percent": 2.1,
+ "ram_percent": 69.1,
+ "ram_used_gb": 43.9642448425293,
+ "gpu_memory_used": 4053.0
},
{
- "timestamp": "2024-12-31T01:04:21.875793",
- "cpu_percent": 13.3,
- "ram_percent": 49.6,
- "ram_used_gb": 31.573482513427734,
- "gpu_memory_used": 3892.0
+ "timestamp": "2024-12-31T03:13:04.705286",
+ "cpu_percent": 12.0,
+ "ram_percent": 69.2,
+ "ram_used_gb": 43.992374420166016,
+ "gpu_memory_used": 4059.0
},
{
- "timestamp": "2024-12-31T01:04:21.962946",
+ "timestamp": "2024-12-31T03:13:04.779475",
+ "cpu_percent": 4.7,
+ "ram_percent": 69.2,
+ "ram_used_gb": 43.9922981262207,
+ "gpu_memory_used": 4059.0
+ },
+ {
+ "timestamp": "2024-12-31T03:13:10.063292",
+ "cpu_percent": 12.4,
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.004146575927734,
+ "gpu_memory_used": 4041.0
+ },
+ {
+ "timestamp": "2024-12-31T03:13:10.155395",
+ "cpu_percent": 6.8,
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.004215240478516,
+ "gpu_memory_used": 4041.0
+ },
+ {
+ "timestamp": "2024-12-31T03:13:16.097887",
+ "cpu_percent": 13.1,
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.0260009765625,
+ "gpu_memory_used": 4042.0
+ },
+ {
+ "timestamp": "2024-12-31T03:13:16.171478",
"cpu_percent": 4.5,
- "ram_percent": 49.6,
- "ram_used_gb": 31.571670532226562,
- "gpu_memory_used": 3882.0
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.02027130126953,
+ "gpu_memory_used": 4042.0
},
{
- "timestamp": "2024-12-31T01:04:25.196632",
- "cpu_percent": 14.5,
- "ram_percent": 49.7,
- "ram_used_gb": 31.587650299072266,
- "gpu_memory_used": 3877.0
- },
- {
- "timestamp": "2024-12-31T01:04:25.272481",
- "cpu_percent": 5.1,
- "ram_percent": 49.7,
- "ram_used_gb": 31.589813232421875,
- "gpu_memory_used": 3877.0
- },
- {
- "timestamp": "2024-12-31T01:04:30.563097",
- "cpu_percent": 12.8,
- "ram_percent": 49.5,
- "ram_used_gb": 31.491657257080078,
- "gpu_memory_used": 4174.0
- },
- {
- "timestamp": "2024-12-31T01:04:30.631986",
- "cpu_percent": 3.9,
- "ram_percent": 49.5,
- "ram_used_gb": 31.508201599121094,
- "gpu_memory_used": 4174.0
- },
- {
- "timestamp": "2024-12-31T01:04:37.566460",
+ "timestamp": "2024-12-31T03:13:23.044945",
"cpu_percent": 12.6,
- "ram_percent": 49.5,
- "ram_used_gb": 31.482032775878906,
- "gpu_memory_used": 4170.0
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.03746795654297,
+ "gpu_memory_used": 4044.0
},
{
- "timestamp": "2024-12-31T01:04:37.633258",
- "cpu_percent": 5.7,
- "ram_percent": 49.5,
- "ram_used_gb": 31.497997283935547,
- "gpu_memory_used": 4170.0
+ "timestamp": "2024-12-31T03:13:23.127442",
+ "cpu_percent": 8.3,
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.0373420715332,
+ "gpu_memory_used": 4044.0
},
{
- "timestamp": "2024-12-31T01:04:47.605037",
- "cpu_percent": 12.7,
- "ram_percent": 49.6,
- "ram_used_gb": 31.576217651367188,
- "gpu_memory_used": 4170.0
+ "timestamp": "2024-12-31T03:13:36.780309",
+ "cpu_percent": 12.5,
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.00790786743164,
+ "gpu_memory_used": 4034.0
},
{
- "timestamp": "2024-12-31T01:04:47.675914",
- "cpu_percent": 7.5,
- "ram_percent": 49.6,
- "ram_used_gb": 31.57619857788086,
- "gpu_memory_used": 4165.0
+ "timestamp": "2024-12-31T03:13:36.853474",
+ "cpu_percent": 6.2,
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.00779724121094,
+ "gpu_memory_used": 4034.0
},
{
- "timestamp": "2024-12-31T01:05:03.055071",
- "cpu_percent": 14.3,
- "ram_percent": 50.4,
- "ram_used_gb": 32.03488540649414,
- "gpu_memory_used": 4175.0
+ "timestamp": "2024-12-31T03:13:57.449274",
+ "cpu_percent": 12.4,
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.0432243347168,
+ "gpu_memory_used": 4034.0
},
{
- "timestamp": "2024-12-31T01:05:03.129251",
- "cpu_percent": 7.6,
- "ram_percent": 50.4,
- "ram_used_gb": 32.03443908691406,
- "gpu_memory_used": 4175.0
+ "timestamp": "2024-12-31T03:13:57.524592",
+ "cpu_percent": 6.2,
+ "ram_percent": 69.2,
+ "ram_used_gb": 44.03204345703125,
+ "gpu_memory_used": 4034.0
},
{
- "timestamp": "2024-12-31T01:05:23.090387",
+ "timestamp": "2024-12-31T03:14:24.698822",
"cpu_percent": 13.4,
- "ram_percent": 50.5,
- "ram_used_gb": 32.15056228637695,
- "gpu_memory_used": 4167.0
+ "ram_percent": 69.5,
+ "ram_used_gb": 44.18327331542969,
+ "gpu_memory_used": 4480.0
},
{
- "timestamp": "2024-12-31T01:05:23.171179",
- "cpu_percent": 12.2,
- "ram_percent": 50.5,
- "ram_used_gb": 32.139862060546875,
- "gpu_memory_used": 4167.0
+ "timestamp": "2024-12-31T03:14:24.783683",
+ "cpu_percent": 4.2,
+ "ram_percent": 69.5,
+ "ram_used_gb": 44.182212829589844,
+ "gpu_memory_used": 4480.0
},
{
- "timestamp": "2024-12-31T01:05:51.417887",
- "cpu_percent": 16.5,
- "ram_percent": 49.0,
- "ram_used_gb": 31.147480010986328,
- "gpu_memory_used": 4574.0
+ "timestamp": "2024-12-31T03:14:58.242642",
+ "cpu_percent": 12.8,
+ "ram_percent": 69.5,
+ "ram_used_gb": 44.20225524902344,
+ "gpu_memory_used": 4476.0
},
{
- "timestamp": "2024-12-31T01:05:51.516422",
- "cpu_percent": 5.0,
- "ram_percent": 49.0,
- "ram_used_gb": 31.142948150634766,
- "gpu_memory_used": 4574.0
+ "timestamp": "2024-12-31T03:14:58.310907",
+ "cpu_percent": 2.9,
+ "ram_percent": 69.5,
+ "ram_used_gb": 44.19659423828125,
+ "gpu_memory_used": 4476.0
},
{
- "timestamp": "2024-12-31T01:06:50.847907",
- "cpu_percent": 13.5,
- "ram_percent": 51.5,
- "ram_used_gb": 32.776729583740234,
- "gpu_memory_used": 4585.0
+ "timestamp": "2024-12-31T03:15:42.196813",
+ "cpu_percent": 14.3,
+ "ram_percent": 69.9,
+ "ram_used_gb": 44.43781661987305,
+ "gpu_memory_used": 4494.0
},
{
- "timestamp": "2024-12-31T01:06:50.958054",
- "cpu_percent": 8.0,
- "ram_percent": 51.5,
- "ram_used_gb": 32.75238800048828,
- "gpu_memory_used": 4585.0
- },
- {
- "timestamp": "2024-12-31T01:08:44.351359",
+ "timestamp": "2024-12-31T03:15:42.288427",
"cpu_percent": 13.7,
- "ram_percent": 52.2,
- "ram_used_gb": 33.209136962890625,
- "gpu_memory_used": 4664.0
+ "ram_percent": 69.9,
+ "ram_used_gb": 44.439701080322266,
+ "gpu_memory_used": 4494.0
+ },
+ {
+ "timestamp": "2024-12-31T03:16:35.483849",
+ "cpu_percent": 14.7,
+ "ram_percent": 65.0,
+ "ram_used_gb": 41.35385513305664,
+ "gpu_memory_used": 4506.0
+ },
+ {
+ "timestamp": "2024-12-31T03:16:35.626628",
+ "cpu_percent": 32.9,
+ "ram_percent": 65.0,
+ "ram_used_gb": 41.34442138671875,
+ "gpu_memory_used": 4506.0
+ },
+ {
+ "timestamp": "2024-12-31T03:17:29.378353",
+ "cpu_percent": 13.4,
+ "ram_percent": 64.3,
+ "ram_used_gb": 40.8721809387207,
+ "gpu_memory_used": 4485.0
+ },
+ {
+ "timestamp": "2024-12-31T03:17:29.457464",
+ "cpu_percent": 5.1,
+ "ram_percent": 64.3,
+ "ram_used_gb": 40.875389099121094,
+ "gpu_memory_used": 4485.0
+ },
+ {
+ "timestamp": "2024-12-31T03:18:31.955862",
+ "cpu_percent": 14.3,
+ "ram_percent": 65.0,
+ "ram_used_gb": 41.360206604003906,
+ "gpu_memory_used": 4484.0
+ },
+ {
+ "timestamp": "2024-12-31T03:18:32.038999",
+ "cpu_percent": 12.5,
+ "ram_percent": 65.0,
+ "ram_used_gb": 41.37223434448242,
+ "gpu_memory_used": 4484.0
+ },
+ {
+ "timestamp": "2024-12-31T03:19:46.454105",
+ "cpu_percent": 13.9,
+ "ram_percent": 65.3,
+ "ram_used_gb": 41.562198638916016,
+ "gpu_memory_used": 4487.0
+ },
+ {
+ "timestamp": "2024-12-31T03:19:46.524303",
+ "cpu_percent": 6.8,
+ "ram_percent": 65.3,
+ "ram_used_gb": 41.56681442260742,
+ "gpu_memory_used": 4487.0
+ },
+ {
+ "timestamp": "2024-12-31T03:21:25.251452",
+ "cpu_percent": 23.7,
+ "ram_percent": 62.0,
+ "ram_used_gb": 39.456459045410156,
+ "gpu_memory_used": 4488.0
+ },
+ {
+ "timestamp": "2024-12-31T03:21:25.348643",
+ "cpu_percent": 2.9,
+ "ram_percent": 62.0,
+ "ram_used_gb": 39.454288482666016,
+ "gpu_memory_used": 4487.0
+ },
+ {
+ "timestamp": "2024-12-31T03:22:53.939896",
+ "cpu_percent": 12.9,
+ "ram_percent": 62.1,
+ "ram_used_gb": 39.50320053100586,
+ "gpu_memory_used": 4488.0
+ },
+ {
+ "timestamp": "2024-12-31T03:22:54.041607",
+ "cpu_percent": 8.3,
+ "ram_percent": 62.1,
+ "ram_used_gb": 39.49895095825195,
+ "gpu_memory_used": 4488.0
+ },
+ {
+ "timestamp": "2024-12-31T03:24:33.835432",
+ "cpu_percent": 12.9,
+ "ram_percent": 62.3,
+ "ram_used_gb": 39.647212982177734,
+ "gpu_memory_used": 4503.0
+ },
+ {
+ "timestamp": "2024-12-31T03:24:33.923914",
+ "cpu_percent": 7.6,
+ "ram_percent": 62.3,
+ "ram_used_gb": 39.64302062988281,
+ "gpu_memory_used": 4503.0
+ },
+ {
+ "timestamp": "2024-12-31T03:26:22.021598",
+ "cpu_percent": 12.9,
+ "ram_percent": 58.4,
+ "ram_used_gb": 37.162540435791016,
+ "gpu_memory_used": 4491.0
+ },
+ {
+ "timestamp": "2024-12-31T03:26:22.142138",
+ "cpu_percent": 12.0,
+ "ram_percent": 58.4,
+ "ram_used_gb": 37.162010192871094,
+ "gpu_memory_used": 4487.0
+ },
+ {
+ "timestamp": "2024-12-31T03:28:15.970365",
+ "cpu_percent": 15.0,
+ "ram_percent": 58.2,
+ "ram_used_gb": 37.04011535644531,
+ "gpu_memory_used": 4481.0
+ },
+ {
+ "timestamp": "2024-12-31T03:28:16.096459",
+ "cpu_percent": 12.4,
+ "ram_percent": 58.2,
+ "ram_used_gb": 37.035972595214844,
+ "gpu_memory_used": 4473.0
+ },
+ {
+ "timestamp": "2024-12-31T03:30:17.092257",
+ "cpu_percent": 12.4,
+ "ram_percent": 58.4,
+ "ram_used_gb": 37.14639663696289,
+ "gpu_memory_used": 4459.0
}
]
}
\ No newline at end of file
diff --git a/examples/benchmarks/benchmark_stats.txt b/examples/benchmarks/benchmark_stats.txt
index 8acc516..c2a9b02 100644
--- a/examples/benchmarks/benchmark_stats.txt
+++ b/examples/benchmarks/benchmark_stats.txt
@@ -1,19 +1,19 @@
=== Benchmark Statistics ===
Overall Stats:
-Total tokens processed: 37750
-Total audio generated: 11724.08s
-Total test duration: 270.20s
-Average processing rate: 143.76 tokens/second
-Average realtime factor: 45.13x
+Total tokens processed: 140500
+Total audio generated: 43469.18s
+Total test duration: 1061.20s
+Average processing rate: 137.67 tokens/second
+Average realtime factor: 42.93x
Per-chunk Stats:
-Average chunk size: 2903.85 tokens
+Average chunk size: 5620.00 tokens
Min chunk size: 100.00 tokens
Max chunk size: 16000.00 tokens
-Average processing time: 20.27s
-Average output length: 901.85s
+Average processing time: 41.13s
+Average output length: 1738.77s
Performance Ranges:
-Processing rate range: 103.56 - 159.43 tokens/second
-Realtime factor range: 32.26x - 50.46x
+Processing rate range: 11.70 - 155.99 tokens/second
+Realtime factor range: 3.65x - 49.46x
diff --git a/examples/benchmarks/benchmark_tts.py b/examples/benchmarks/benchmark_tts.py
index 2e657ce..f17e6ee 100644
--- a/examples/benchmarks/benchmark_tts.py
+++ b/examples/benchmarks/benchmark_tts.py
@@ -103,7 +103,7 @@ def make_tts_request(text: str, timeout: int = 120) -> tuple[float, float]:
response = requests.post(
"http://localhost:8880/v1/audio/speech",
json={
- "model": "tts-1",
+ "model": "kokoro",
"input": text,
"voice": "af",
"response_format": "wav",
@@ -240,15 +240,15 @@ def main():
print(f"Total tokens in file: {total_tokens}")
# Generate token sizes with dense sampling at start and increasing intervals
- dense_range = list(range(100, 600, 100)) # 100, 200, 300, 400, 500
- medium_range = [750, 1000, 1500, 2000, 3000]
+ dense_range = list(range(100, 1001, 100))
+ current = max(dense_range)
large_range = []
- current = 4000
while current <= total_tokens:
large_range.append(current)
- current *= 2
+ current += 1000
- token_sizes = dense_range + medium_range + large_range
+ token_sizes = sorted(list(set(dense_range + large_range)))
+ print(f"Testing sizes: {token_sizes}")
# Process chunks
results = []
diff --git a/examples/benchmarks/processing_time.png b/examples/benchmarks/processing_time.png
index a51e8c2..c66fcaf 100644
Binary files a/examples/benchmarks/processing_time.png and b/examples/benchmarks/processing_time.png differ
diff --git a/examples/benchmarks/realtime_factor.png b/examples/benchmarks/realtime_factor.png
index e65fb09..249685b 100644
Binary files a/examples/benchmarks/realtime_factor.png and b/examples/benchmarks/realtime_factor.png differ
diff --git a/examples/benchmarks/system_usage.png b/examples/benchmarks/system_usage.png
index eccfb6d..bc10eb0 100644
Binary files a/examples/benchmarks/system_usage.png and b/examples/benchmarks/system_usage.png differ