From 607df6e03bb196d0a1c036a4d77c033506d45c92 Mon Sep 17 00:00:00 2001
From: remsky <jeremy.braun@ucalgary.ca>
Date: Tue, 31 Dec 2024 03:46:31 -0700
Subject: [PATCH] Update README and tests to clarify audio format support and
 enhance documentation

---
 README.md                   | 14 +++++++-------
 examples/test_openai_tts.py |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index a40da19..6022932 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,10 @@
 [![Tests](https://img.shields.io/badge/tests-33%20passed-darkgreen)]()
 [![Coverage](https://img.shields.io/badge/coverage-97%25-darkgreen)]()
 
-FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model. 
-
-OpenAI-compatible API with NVIDIA GPU support, with automatic chunking/stitching for long texts, and very fast generation time (~35-49x RTF)
+FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model, providing an OpenAI-compatible endpoint with:
+- NVIDIA GPU acceleration enabled
+- automatic chunking/stitching for long texts
+- very fast generation time (~35-49x RTF)
 
 ## Quick Start
 
@@ -57,7 +58,7 @@ response = requests.post(
         "model": "kokoro",  # Not used but required for compatibility
         "input": "Hello world!",
         "voice": "af_bella",
-        "response_format": "mp3",  # Supported: mp3, wav, opus, flac, aac
+        "response_format": "mp3",  # Supported: mp3, wav, opus, flac
         "speed": 1.0
     }
 )
@@ -90,6 +91,7 @@ Benchmarking was performed on generation via the local API using text lengths up
 - NVIDIA 4060Ti 16gb GPU @ CUDA 12.1
 - 11th Gen i7-11700 @ 2.5GHz
 - 64gb RAM
+- WAV native output
 - H.G. Wells - The Time Machine (full text)
 
 <p align="center">
@@ -106,11 +108,9 @@ Key Performance Metrics:
 ## Features
 
 - OpenAI-compatible API endpoints
-- Multiple audio formats: mp3, wav, opus, flac, aac
+- Multiple audio formats: mp3, wav, opus, flac, (aac & pcm not implemented)
 - Automatic text chunking and audio stitching
 - GPU-accelerated inference
-- Queue handling via SQLite
-- Progress tracking for long generations
 
 ## Model
 
diff --git a/examples/test_openai_tts.py b/examples/test_openai_tts.py
index 932aa11..7cc8104 100644
--- a/examples/test_openai_tts.py
+++ b/examples/test_openai_tts.py
@@ -60,7 +60,7 @@ def test_speed(speed: float):
 
 # Test different formats
 for format in ["wav", "mp3", "opus", "aac", "flac", "pcm"]:
-    test_format(format)
+    test_format(format) # aac and pcm should fail as they are not supported
 
 # Test different speeds
 for speed in [0.25, 1.0, 2.0, 4.0]:  # 5.0 should fail as it's out of range