mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
Update README and tests to clarify audio format support and enhance documentation
This commit is contained in:
parent
36606f7234
commit
607df6e03b
2 changed files with 8 additions and 8 deletions
14
README.md
14
README.md
|
@ -7,9 +7,10 @@
|
||||||
[]()
|
[]()
|
||||||
[]()
|
[]()
|
||||||
|
|
||||||
FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model.
|
FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model, providing an OpenAI-compatible endpoint with:
|
||||||
|
- NVIDIA GPU acceleration enabled
|
||||||
OpenAI-compatible API with NVIDIA GPU support, with automatic chunking/stitching for long texts, and very fast generation time (~35-49x RTF)
|
- automatic chunking/stitching for long texts
|
||||||
|
- very fast generation time (~35-49x RTF)
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
|
@ -57,7 +58,7 @@ response = requests.post(
|
||||||
"model": "kokoro", # Not used but required for compatibility
|
"model": "kokoro", # Not used but required for compatibility
|
||||||
"input": "Hello world!",
|
"input": "Hello world!",
|
||||||
"voice": "af_bella",
|
"voice": "af_bella",
|
||||||
"response_format": "mp3", # Supported: mp3, wav, opus, flac, aac
|
"response_format": "mp3", # Supported: mp3, wav, opus, flac
|
||||||
"speed": 1.0
|
"speed": 1.0
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -90,6 +91,7 @@ Benchmarking was performed on generation via the local API using text lengths up
|
||||||
- NVIDIA 4060Ti 16gb GPU @ CUDA 12.1
|
- NVIDIA 4060Ti 16gb GPU @ CUDA 12.1
|
||||||
- 11th Gen i7-11700 @ 2.5GHz
|
- 11th Gen i7-11700 @ 2.5GHz
|
||||||
- 64gb RAM
|
- 64gb RAM
|
||||||
|
- WAV native output
|
||||||
- H.G. Wells - The Time Machine (full text)
|
- H.G. Wells - The Time Machine (full text)
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
|
@ -106,11 +108,9 @@ Key Performance Metrics:
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- OpenAI-compatible API endpoints
|
- OpenAI-compatible API endpoints
|
||||||
- Multiple audio formats: mp3, wav, opus, flac, aac
|
- Multiple audio formats: mp3, wav, opus, flac, (aac & pcm not implemented)
|
||||||
- Automatic text chunking and audio stitching
|
- Automatic text chunking and audio stitching
|
||||||
- GPU-accelerated inference
|
- GPU-accelerated inference
|
||||||
- Queue handling via SQLite
|
|
||||||
- Progress tracking for long generations
|
|
||||||
|
|
||||||
## Model
|
## Model
|
||||||
|
|
||||||
|
|
|
@ -60,7 +60,7 @@ def test_speed(speed: float):
|
||||||
|
|
||||||
# Test different formats
|
# Test different formats
|
||||||
for format in ["wav", "mp3", "opus", "aac", "flac", "pcm"]:
|
for format in ["wav", "mp3", "opus", "aac", "flac", "pcm"]:
|
||||||
test_format(format)
|
test_format(format) # aac and pcm should fail as they are not supported
|
||||||
|
|
||||||
# Test different speeds
|
# Test different speeds
|
||||||
for speed in [0.25, 1.0, 2.0, 4.0]: # 5.0 should fail as it's out of range
|
for speed in [0.25, 1.0, 2.0, 4.0]: # 5.0 should fail as it's out of range
|
||||||
|
|
Loading…
Add table
Reference in a new issue