From 1cf011b2ebf50d7f37a281cc2ed056107a4e45db Mon Sep 17 00:00:00 2001
From: Krurst <60066261+Krurst@users.noreply.github.com>
Date: Tue, 11 Feb 2025 23:35:51 +0800
Subject: [PATCH 1/9] Update openai_compatible.py to fix lang_code

properly sets lang_code from api request, and applies config default if not set
---
 api/src/routers/openai_compatible.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py
index 5508d65..106cdd2 100644
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@@ -137,7 +137,7 @@ async def stream_audio_chunks(
             voice=voice_name,
             speed=request.speed,
             output_format=request.response_format,
-            lang_code=request.lang_code or request.voice[0],
+            lang_code=request.lang_code if request.lang_code else (settings.default_voice_code if settings.default_voice_code else voice_name[0].lower()),
         ):
             # Check if client is still connected
             is_disconnected = client_request.is_disconnected

From b6dd9f326ba6da22b2cac09477074e340ecba67a Mon Sep 17 00:00:00 2001
From: Ikko Eltociear Ashimine <eltociear@gmail.com>
Date: Wed, 12 Feb 2025 02:11:08 +0900
Subject: [PATCH 2/9] docs: update README.md

accomodate -> accommodate
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4c8372c..18ee2b0 100644
--- a/README.md
+++ b/README.md
@@ -351,7 +351,7 @@ cd docker/cpu
 docker compose up --build
 
 ```
-*Note: Overall speed may have reduced somewhat with the structural changes to accomodate streaming. Looking into it* 
+*Note: Overall speed may have reduced somewhat with the structural changes to accommodate streaming. Looking into it* 
 </details>
 
 <details>

From 1e14fd8724f71d63c1c1b8e9fb7ac5b1c97527b2 Mon Sep 17 00:00:00 2001
From: zucher <zucher@free.fr>
Date: Tue, 11 Feb 2025 21:02:58 +0000
Subject: [PATCH 3/9] Fix chart ingress issue

---
 charts/kokoro-fastapi/values.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/charts/kokoro-fastapi/values.yaml b/charts/kokoro-fastapi/values.yaml
index 05419d9..0db2f95 100644
--- a/charts/kokoro-fastapi/values.yaml
+++ b/charts/kokoro-fastapi/values.yaml
@@ -45,8 +45,8 @@ ingress:
   host:
     name: kokoro.example.com
     endpoints:
-      backend:
-        path: "/"
+      - paths:
+          - "/"
         serviceName: "fastapi"
         servicePort: 8880
 

From f5851854042c7aff54e4e75c77ce824ca52c64f5 Mon Sep 17 00:00:00 2001
From: remsky <jeremy.braun@ucalgary.ca>
Date: Wed, 12 Feb 2025 23:31:47 -0700
Subject: [PATCH 4/9] Update openai_compatible.py

---
 api/src/routers/openai_compatible.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py
index 2dc0ac2..931c4cd 100644
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@@ -138,7 +138,7 @@ async def stream_audio_chunks(
             voice=voice_name,
             speed=request.speed,
             output_format=request.response_format,
-            lang_code = request.lang_code or settings.default_voice_code or voice_name[0].lower(),
+            lang_code=request.lang_code or settings.default_voice_code or voice_name[0].lower(),
             normalization_options=request.normalization_options
         ):
             # Check if client is still connected

From 37ea01eaf92e46aba1178813b4a4d7824c893bba Mon Sep 17 00:00:00 2001
From: remsky <jeremy.braun@ucalgary.ca>
Date: Thu, 13 Feb 2025 00:04:21 -0700
Subject: [PATCH 5/9] fix: download_format option for audio response, handling
 in create_speech

---
 api/src/routers/openai_compatible.py | 6 ++++--
 api/src/structures/schemas.py        | 4 ++++
 web/src/services/AudioService.js     | 3 ++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py
index 931c4cd..327ccdd 100644
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@@ -197,7 +197,9 @@ async def create_speech(
             if request.return_download_link:
                 from ..services.temp_manager import TempFileWriter
 
-                temp_writer = TempFileWriter(request.response_format)
+                # Use download_format if specified, otherwise use response_format
+                output_format = request.download_format or request.response_format
+                temp_writer = TempFileWriter(output_format)
                 await temp_writer.__aenter__()  # Initialize temp file
 
                 # Get download path immediately after temp file creation
@@ -205,7 +207,7 @@ async def create_speech(
 
                 # Create response headers with download path
                 headers = {
-                    "Content-Disposition": f"attachment; filename=speech.{request.response_format}",
+                    "Content-Disposition": f"attachment; filename=speech.{output_format}",
                     "X-Accel-Buffering": "no",
                     "Cache-Control": "no-cache",
                     "Transfer-Encoding": "chunked",
diff --git a/api/src/structures/schemas.py b/api/src/structures/schemas.py
index 7a6484c..e233d61 100644
--- a/api/src/structures/schemas.py
+++ b/api/src/structures/schemas.py
@@ -60,6 +60,10 @@ class OpenAISpeechRequest(BaseModel):
         default="mp3",
         description="The format to return audio in. Supported formats: mp3, opus, flac, wav, pcm. PCM format returns raw 16-bit samples without headers. AAC is not currently supported.",
     )
+    download_format: Optional[Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]] = Field(
+        default=None,
+        description="Optional different format for the final download. If not provided, uses response_format.",
+    )
     speed: float = Field(
         default=1.0,
         ge=0.25,
diff --git a/web/src/services/AudioService.js b/web/src/services/AudioService.js
index 44d0727..cee33d4 100644
--- a/web/src/services/AudioService.js
+++ b/web/src/services/AudioService.js
@@ -39,7 +39,8 @@ export class AudioService {
                 body: JSON.stringify({
                     input: text,
                     voice: voice,
-                    response_format: 'mp3',
+                    response_format: 'mp3', // Always use mp3 for streaming playback
+                    download_format: document.getElementById('format-select').value || 'mp3', // Format for final download
                     stream: true,
                     speed: speed,
                     return_download_link: true,

From cfae7db7fc10bf2123bd01af462b127b5e76ee5c Mon Sep 17 00:00:00 2001
From: remsky <jeremy.braun@ucalgary.ca>
Date: Thu, 13 Feb 2025 00:22:14 -0700
Subject: [PATCH 6/9] fix: bump up audio quality settings in
 StreamingAudioWriter

---
 api/src/services/streaming_audio_writer.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/api/src/services/streaming_audio_writer.py b/api/src/services/streaming_audio_writer.py
index 6c31e47..1a45eec 100644
--- a/api/src/services/streaming_audio_writer.py
+++ b/api/src/services/streaming_audio_writer.py
@@ -112,7 +112,7 @@ class StreamingAudioWriter:
                         parameters.extend(
                             [
                                 "-q:a",
-                                "2",
+                                "0",  # Highest quality
                                 "-write_xing",
                                 "1",  # XING header for MP3
                                 "-id3v1",
@@ -142,7 +142,7 @@ class StreamingAudioWriter:
                     self.encoder.export(
                         output_buffer,
                         **format_args,
-                        bitrate="192k",
+                        bitrate="192k",  # Optimal for 24kHz/16-bit mono source
                         parameters=parameters,
                     )
                     self.encoder = None
@@ -189,10 +189,10 @@ class StreamingAudioWriter:
             self.encoder.export(
                 output_buffer,
                 **format_args,
-                bitrate="192k",
+                bitrate="192k",  # Optimal for 24kHz/16-bit mono source
                 parameters=[
                     "-q:a",
-                    "2",
+                    "0",  # Highest quality for chunks too
                     "-write_xing",
                     "0",  # No XING headers for chunks
                 ],

From 97f82c0685cc5ee24deec0a722a65d00e6e384be Mon Sep 17 00:00:00 2001
From: remsky <jeremy.braun@ucalgary.ca>
Date: Thu, 13 Feb 2025 03:11:11 -0700
Subject: [PATCH 7/9] Update README.md

---
 README.md | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 18ee2b0..cabc5c7 100644
--- a/README.md
+++ b/README.md
@@ -12,15 +12,20 @@
 
 [![Tested at Model Commit](https://img.shields.io/badge/last--tested--model--commit-1.0::9901c2b-blue)](https://huggingface.co/hexgrad/Kokoro-82M/commit/9901c2b79161b6e898b7ea857ae5298f47b8b0d6)
 
-
 Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model
 - Multi-language support (English, Japanese, Korean, Chinese, Vietnamese)
 - OpenAI-compatible Speech endpoint, NVIDIA GPU accelerated or CPU inference with PyTorch 
 - ONNX support coming soon, see v0.1.5 and earlier for legacy ONNX support in the interim
 - Debug endpoints for monitoring system stats, integrated web UI on localhost:8880/web
 - Phoneme-based audio generation, phoneme generation
-- (new) Per-word timestamped caption generation
-- (new) Voice mixing with weighted combinations
+- Per-word timestamped caption generation
+- Voice mixing with weighted combinations
+
+### Integration Guides
+[![SillyTavern](https://img.shields.io/badge/SillyTavern-black?style=flat&color=red)](https://github.com/remsky/Kokoro-FastAPI/wiki/Integrations-SillyTavern)
+[![OpenWebUI](https://img.shields.io/badge/OpenWebUI-black?style=flat&color=white)](https://github.com/remsky/Kokoro-FastAPI/wiki/Integrations-OpenWebUi) [![Helm Chart](https://img.shields.io/badge/Helm%20Chart-black?style=flat&logo=helm&logoColor=white)](https://github.com/remsky/Kokoro-FastAPI/wiki/Setup-Kubernetes)
+
+
 
 
 ## Get Started
@@ -36,8 +41,8 @@ Refer to the core/config.py file for a full list of variables which can be manag
 # the `latest` tag can be used, but should not be considered stable as it may include `nightly` branch builds
 # it may have some bonus features however, and feedback/testing is welcome
 
-docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:v0.2.1 # CPU, or:
-docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:v0.2.1  #NVIDIA GPU
+docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:v0.2.2 # CPU, or:
+docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:v0.2.2  #NVIDIA GPU
 ```
 
 
@@ -121,6 +126,7 @@ with client.audio.speech.with_streaming_response.create(
 
 </details>
 
+
 ## Features 
 <details>
 <summary>OpenAI-Compatible Speech Endpoint</summary>

From f587309d8f23ef01d3476281a165fc65de374f96 Mon Sep 17 00:00:00 2001
From: remsky <jeremy.braun@ucalgary.ca>
Date: Thu, 13 Feb 2025 03:12:45 -0700
Subject: [PATCH 8/9] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cabc5c7..8428fee 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 [![Tested at Model Commit](https://img.shields.io/badge/last--tested--model--commit-1.0::9901c2b-blue)](https://huggingface.co/hexgrad/Kokoro-82M/commit/9901c2b79161b6e898b7ea857ae5298f47b8b0d6)
 
 Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model
-- Multi-language support (English, Japanese, Korean, Chinese, Vietnamese)
+- Multi-language support (English, Japanese, Korean, Chinese, _Vietnamese soon_)
 - OpenAI-compatible Speech endpoint, NVIDIA GPU accelerated or CPU inference with PyTorch 
 - ONNX support coming soon, see v0.1.5 and earlier for legacy ONNX support in the interim
 - Debug endpoints for monitoring system stats, integrated web UI on localhost:8880/web

From b00c9ec28df0fd551ae25108a986e04d29a54f2e Mon Sep 17 00:00:00 2001
From: remsky <jeremy.braun@ucalgary.ca>
Date: Thu, 13 Feb 2025 20:38:45 -0700
Subject: [PATCH 9/9] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 8428fee..04a235c 100644
--- a/README.md
+++ b/README.md
@@ -22,8 +22,8 @@ Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokor
 - Voice mixing with weighted combinations
 
 ### Integration Guides
-[![SillyTavern](https://img.shields.io/badge/SillyTavern-black?style=flat&color=red)](https://github.com/remsky/Kokoro-FastAPI/wiki/Integrations-SillyTavern)
-[![OpenWebUI](https://img.shields.io/badge/OpenWebUI-black?style=flat&color=white)](https://github.com/remsky/Kokoro-FastAPI/wiki/Integrations-OpenWebUi) [![Helm Chart](https://img.shields.io/badge/Helm%20Chart-black?style=flat&logo=helm&logoColor=white)](https://github.com/remsky/Kokoro-FastAPI/wiki/Setup-Kubernetes)
+ [![Helm Chart](https://img.shields.io/badge/Helm%20Chart-black?style=flat&logo=helm&logoColor=white)](https://github.com/remsky/Kokoro-FastAPI/wiki/Setup-Kubernetes) [![DigitalOcean](https://img.shields.io/badge/DigitalOcean-black?style=flat&logo=digitalocean&logoColor=white)](https://github.com/remsky/Kokoro-FastAPI/wiki/Integrations-DigitalOcean) [![SillyTavern](https://img.shields.io/badge/SillyTavern-black?style=flat&color=red)](https://github.com/remsky/Kokoro-FastAPI/wiki/Integrations-SillyTavern)
+[![OpenWebUI](https://img.shields.io/badge/OpenWebUI-black?style=flat&color=white)](https://github.com/remsky/Kokoro-FastAPI/wiki/Integrations-OpenWebUi)