mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
Update README and tests to clarify audio format support and enhance documentation
This commit is contained in:
parent
36606f7234
commit
607df6e03b
2 changed files with 8 additions and 8 deletions
14
README.md
14
README.md
|
@ -7,9 +7,10 @@
|
|||
[]()
|
||||
[]()
|
||||
|
||||
FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model.
|
||||
|
||||
OpenAI-compatible API with NVIDIA GPU support, with automatic chunking/stitching for long texts, and very fast generation time (~35-49x RTF)
|
||||
FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model, providing an OpenAI-compatible endpoint with:
|
||||
- NVIDIA GPU acceleration enabled
|
||||
- automatic chunking/stitching for long texts
|
||||
- very fast generation time (~35-49x RTF)
|
||||
|
||||
## Quick Start
|
||||
|
||||
|
@ -57,7 +58,7 @@ response = requests.post(
|
|||
"model": "kokoro", # Not used but required for compatibility
|
||||
"input": "Hello world!",
|
||||
"voice": "af_bella",
|
||||
"response_format": "mp3", # Supported: mp3, wav, opus, flac, aac
|
||||
"response_format": "mp3", # Supported: mp3, wav, opus, flac
|
||||
"speed": 1.0
|
||||
}
|
||||
)
|
||||
|
@ -90,6 +91,7 @@ Benchmarking was performed on generation via the local API using text lengths up
|
|||
- NVIDIA 4060Ti 16gb GPU @ CUDA 12.1
|
||||
- 11th Gen i7-11700 @ 2.5GHz
|
||||
- 64gb RAM
|
||||
- WAV native output
|
||||
- H.G. Wells - The Time Machine (full text)
|
||||
|
||||
<p align="center">
|
||||
|
@ -106,11 +108,9 @@ Key Performance Metrics:
|
|||
## Features
|
||||
|
||||
- OpenAI-compatible API endpoints
|
||||
- Multiple audio formats: mp3, wav, opus, flac, aac
|
||||
- Multiple audio formats: mp3, wav, opus, flac, (aac & pcm not implemented)
|
||||
- Automatic text chunking and audio stitching
|
||||
- GPU-accelerated inference
|
||||
- Queue handling via SQLite
|
||||
- Progress tracking for long generations
|
||||
|
||||
## Model
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ def test_speed(speed: float):
|
|||
|
||||
# Test different formats
|
||||
for format in ["wav", "mp3", "opus", "aac", "flac", "pcm"]:
|
||||
test_format(format)
|
||||
test_format(format) # aac and pcm should fail as they are not supported
|
||||
|
||||
# Test different speeds
|
||||
for speed in [0.25, 1.0, 2.0, 4.0]: # 5.0 should fail as it's out of range
|
||||
|
|
Loading…
Add table
Reference in a new issue