mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-08-05 16:48:53 +00:00
ci: enhance local saving feature, update voice selection to support multiple voices, and improve output filename generation
This commit is contained in:
parent
9edc7fd7fc
commit
5cc3bacac1
14 changed files with 266 additions and 157 deletions
|
@ -7,6 +7,7 @@ omit =
|
||||||
MagicMock/*
|
MagicMock/*
|
||||||
test_*.py
|
test_*.py
|
||||||
examples/*
|
examples/*
|
||||||
|
src/builds/*
|
||||||
|
|
||||||
[report]
|
[report]
|
||||||
exclude_lines =
|
exclude_lines =
|
||||||
|
|
30
README.md
30
README.md
|
@ -2,9 +2,9 @@
|
||||||
<img src="githubbanner.png" alt="Kokoro TTS Banner">
|
<img src="githubbanner.png" alt="Kokoro TTS Banner">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
# Kokoro TTS API
|
# <sub><sub>_`FastKoko`_ </sub></sub>
|
||||||
[]()
|
[]()
|
||||||
[]()
|
[]()
|
||||||
[](https://huggingface.co/hexgrad/Kokoro-82M/tree/c3b0d86e2a980e027ef71c28819ea02e351c2667) [](https://huggingface.co/spaces/Remsky/Kokoro-TTS-Zero) [](https://www.buymeacoffee.com/remsky)
|
[](https://huggingface.co/hexgrad/Kokoro-82M/tree/c3b0d86e2a980e027ef71c28819ea02e351c2667) [](https://huggingface.co/spaces/Remsky/Kokoro-TTS-Zero) [](https://www.buymeacoffee.com/remsky)
|
||||||
|
|
||||||
Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model
|
Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model
|
||||||
|
@ -35,8 +35,9 @@ The service can be accessed through either the API endpoints or the Gradio web i
|
||||||
|
|
||||||
- Using Docker Compose (Full setup including UI):
|
- Using Docker Compose (Full setup including UI):
|
||||||
```bash
|
```bash
|
||||||
docker compose up --build # for GPU
|
cd docker/gpu # OR
|
||||||
docker compose -f docker-compose.cpu.yml up --build # for CPU
|
# cd docker/cpu # Run this or the above
|
||||||
|
docker compose up --build
|
||||||
```
|
```
|
||||||
- OR running the API alone using Docker (model + voice packs baked in):
|
- OR running the API alone using Docker (model + voice packs baked in):
|
||||||
```bash
|
```bash
|
||||||
|
@ -350,6 +351,27 @@ See `examples/phoneme_examples/generate_phonemes.py` for a sample script.
|
||||||
|
|
||||||
## Known Issues
|
## Known Issues
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>Versioning & Development</summary>
|
||||||
|
|
||||||
|
I'm doing what I can to keep things stable, but we are on an early and rapid set of build cycles here.
|
||||||
|
If you run into trouble, you may have to roll back a version on the release tags if something comes up, or build up from source and/or troubleshoot + submit a PR. Will leave the branch up here for the last known stable points:
|
||||||
|
|
||||||
|
`v0.0.5post1`
|
||||||
|
|
||||||
|
Free and open source is a community effort, and I love working on this project, though there's only really so many hours in a day. If you'd like to support the work, feel free to open a PR, buy me a coffee, or report any bugs/features/etc you find during use.
|
||||||
|
|
||||||
|
<a href="https://www.buymeacoffee.com/remsky" target="_blank">
|
||||||
|
<img
|
||||||
|
src="https://cdn.buymeacoffee.com/buttons/v2/default-violet.png"
|
||||||
|
alt="Buy Me A Coffee"
|
||||||
|
style="height: 30px !important;width: 110px !important;"
|
||||||
|
>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary>Linux GPU Permissions</summary>
|
<summary>Linux GPU Permissions</summary>
|
||||||
|
|
||||||
|
|
|
@ -35,3 +35,4 @@ services:
|
||||||
environment:
|
environment:
|
||||||
- GRADIO_WATCH=True # Enable hot reloading
|
- GRADIO_WATCH=True # Enable hot reloading
|
||||||
- PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
|
- PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
|
||||||
|
- DISABLE_LOCAL_SAVING=false # Set to 'true' to disable local saving and hide file view
|
|
@ -32,4 +32,4 @@ services:
|
||||||
environment:
|
environment:
|
||||||
- GRADIO_WATCH=1 # Enable hot reloading
|
- GRADIO_WATCH=1 # Enable hot reloading
|
||||||
- PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
|
- PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
|
||||||
- DISABLE_LOCAL_SAVING=true # Set to 'true' to disable local saving and hide file view
|
- DISABLE_LOCAL_SAVING=false # Set to 'true' to disable local saving and hide file view
|
||||||
|
|
|
@ -36,15 +36,18 @@ def check_api_status() -> Tuple[bool, List[str]]:
|
||||||
|
|
||||||
|
|
||||||
def text_to_speech(
|
def text_to_speech(
|
||||||
text: str, voice_id: str, format: str, speed: float
|
text: str, voice_id: str | list, format: str, speed: float
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""Generate speech from text using TTS API."""
|
"""Generate speech from text using TTS API."""
|
||||||
if not text.strip():
|
if not text.strip():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Handle multiple voices
|
||||||
|
voice_str = voice_id if isinstance(voice_id, str) else "+".join(voice_id)
|
||||||
|
|
||||||
# Create output filename
|
# Create output filename
|
||||||
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||||
output_filename = f"output_{timestamp}_voice-{voice_id}_speed-{speed}.{format}"
|
output_filename = f"output_{timestamp}_voice-{voice_str}_speed-{speed}.{format}"
|
||||||
output_path = os.path.join(OUTPUTS_DIR, output_filename)
|
output_path = os.path.join(OUTPUTS_DIR, output_filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -53,7 +56,7 @@ def text_to_speech(
|
||||||
json={
|
json={
|
||||||
"model": "kokoro",
|
"model": "kokoro",
|
||||||
"input": text,
|
"input": text,
|
||||||
"voice": voice_id,
|
"voice": voice_str,
|
||||||
"response_format": format,
|
"response_format": format,
|
||||||
"speed": float(speed),
|
"speed": float(speed),
|
||||||
},
|
},
|
||||||
|
|
|
@ -5,54 +5,78 @@ import gradio as gr
|
||||||
from .. import files
|
from .. import files
|
||||||
|
|
||||||
|
|
||||||
def create_input_column() -> Tuple[gr.Column, dict]:
|
def create_input_column(disable_local_saving: bool = False) -> Tuple[gr.Column, dict]:
|
||||||
"""Create the input column with text input and file handling."""
|
"""Create the input column with text input and file handling."""
|
||||||
with gr.Column(scale=1) as col:
|
with gr.Column(scale=1) as col:
|
||||||
with gr.Tabs() as tabs:
|
text_input = gr.Textbox(
|
||||||
# Set first tab as selected by default
|
label="Text to speak", placeholder="Enter text here...", lines=4
|
||||||
tabs.selected = 0
|
)
|
||||||
# Direct Input Tab
|
|
||||||
with gr.TabItem("Direct Input"):
|
# Always show file upload but handle differently based on disable_local_saving
|
||||||
text_input = gr.Textbox(
|
file_upload = gr.File(
|
||||||
label="Text to speak", placeholder="Enter text here...", lines=4
|
label="Upload Text File (.txt)", file_types=[".txt"]
|
||||||
)
|
)
|
||||||
text_submit = gr.Button("Generate Speech", variant="primary", size="lg")
|
|
||||||
|
if not disable_local_saving:
|
||||||
|
# Show full interface with tabs when saving is enabled
|
||||||
|
with gr.Tabs() as tabs:
|
||||||
|
# Set first tab as selected by default
|
||||||
|
tabs.selected = 0
|
||||||
|
# Direct Input Tab
|
||||||
|
with gr.TabItem("Direct Input"):
|
||||||
|
text_submit_direct = gr.Button("Generate Speech", variant="primary", size="lg")
|
||||||
|
|
||||||
# File Input Tab
|
# File Input Tab
|
||||||
with gr.TabItem("From File"):
|
with gr.TabItem("From File"):
|
||||||
# Existing files dropdown
|
# Existing files dropdown
|
||||||
input_files_list = gr.Dropdown(
|
input_files_list = gr.Dropdown(
|
||||||
label="Select Existing File",
|
label="Select Existing File",
|
||||||
choices=files.list_input_files(),
|
choices=files.list_input_files(),
|
||||||
value=None,
|
value=None,
|
||||||
)
|
|
||||||
|
|
||||||
# Simple file upload
|
|
||||||
file_upload = gr.File(
|
|
||||||
label="Upload Text File (.txt)", file_types=[".txt"]
|
|
||||||
)
|
|
||||||
|
|
||||||
file_preview = gr.Textbox(
|
|
||||||
label="File Content Preview", interactive=False, lines=4
|
|
||||||
)
|
|
||||||
|
|
||||||
with gr.Row():
|
|
||||||
file_submit = gr.Button(
|
|
||||||
"Generate Speech", variant="primary", size="lg"
|
|
||||||
)
|
|
||||||
clear_files = gr.Button(
|
|
||||||
"Clear Files", variant="secondary", size="lg"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
components = {
|
file_preview = gr.Textbox(
|
||||||
"tabs": tabs,
|
label="File Content Preview", interactive=False, lines=4
|
||||||
"text_input": text_input,
|
)
|
||||||
"file_select": input_files_list,
|
|
||||||
"file_upload": file_upload,
|
with gr.Row():
|
||||||
"file_preview": file_preview,
|
file_submit = gr.Button(
|
||||||
"text_submit": text_submit,
|
"Generate Speech", variant="primary", size="lg"
|
||||||
"file_submit": file_submit,
|
)
|
||||||
"clear_files": clear_files,
|
clear_files = gr.Button(
|
||||||
}
|
"Clear Files", variant="secondary", size="lg"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Just show the generate button when saving is disabled
|
||||||
|
text_submit_direct = gr.Button("Generate Speech", variant="primary", size="lg")
|
||||||
|
tabs = None
|
||||||
|
input_files_list = None
|
||||||
|
file_preview = None
|
||||||
|
file_submit = None
|
||||||
|
clear_files = None
|
||||||
|
|
||||||
|
# Initialize components based on disable_local_saving
|
||||||
|
if disable_local_saving:
|
||||||
|
components = {
|
||||||
|
"tabs": None,
|
||||||
|
"text_input": text_input,
|
||||||
|
"text_submit": text_submit_direct,
|
||||||
|
"file_select": None,
|
||||||
|
"file_upload": file_upload, # Keep file upload even when saving is disabled
|
||||||
|
"file_preview": None,
|
||||||
|
"file_submit": None,
|
||||||
|
"clear_files": None,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
components = {
|
||||||
|
"tabs": tabs,
|
||||||
|
"text_input": text_input,
|
||||||
|
"text_submit": text_submit_direct,
|
||||||
|
"file_select": input_files_list,
|
||||||
|
"file_upload": file_upload,
|
||||||
|
"file_preview": file_preview,
|
||||||
|
"file_submit": file_submit,
|
||||||
|
"clear_files": clear_files,
|
||||||
|
}
|
||||||
|
|
||||||
return col, components
|
return col, components
|
||||||
|
|
|
@ -20,9 +20,10 @@ def create_model_column(voice_ids: Optional[list] = None) -> Tuple[gr.Column, di
|
||||||
|
|
||||||
voice_input = gr.Dropdown(
|
voice_input = gr.Dropdown(
|
||||||
choices=voice_ids,
|
choices=voice_ids,
|
||||||
label="Voice",
|
label="Voice(s)",
|
||||||
value=voice_ids[0] if voice_ids else None,
|
value=voice_ids[0] if voice_ids else None,
|
||||||
interactive=True,
|
interactive=True,
|
||||||
|
multiselect=True,
|
||||||
)
|
)
|
||||||
format_input = gr.Dropdown(
|
format_input = gr.Dropdown(
|
||||||
choices=config.AUDIO_FORMATS, label="Audio Format", value="mp3"
|
choices=config.AUDIO_FORMATS, label="Audio Format", value="mp3"
|
||||||
|
|
|
@ -9,7 +9,11 @@ def create_output_column(disable_local_saving: bool = False) -> Tuple[gr.Column,
|
||||||
"""Create the output column with audio player and file list."""
|
"""Create the output column with audio player and file list."""
|
||||||
with gr.Column(scale=1) as col:
|
with gr.Column(scale=1) as col:
|
||||||
gr.Markdown("### Latest Output")
|
gr.Markdown("### Latest Output")
|
||||||
audio_output = gr.Audio(label="Generated Speech", type="filepath")
|
audio_output = gr.Audio(
|
||||||
|
label="Generated Speech",
|
||||||
|
type="filepath",
|
||||||
|
waveform_options={"waveform_color": "#4C87AB"}
|
||||||
|
)
|
||||||
|
|
||||||
# Create file-related components with visible=False when local saving is disabled
|
# Create file-related components with visible=False when local saving is disabled
|
||||||
gr.Markdown("### Generated Files", visible=not disable_local_saving)
|
gr.Markdown("### Generated Files", visible=not disable_local_saving)
|
||||||
|
@ -17,7 +21,7 @@ def create_output_column(disable_local_saving: bool = False) -> Tuple[gr.Column,
|
||||||
label="Previous Outputs",
|
label="Previous Outputs",
|
||||||
choices=files.list_output_files() if not disable_local_saving else [],
|
choices=files.list_output_files() if not disable_local_saving else [],
|
||||||
value=None,
|
value=None,
|
||||||
allow_custom_value=False,
|
allow_custom_value=True,
|
||||||
visible=not disable_local_saving,
|
visible=not disable_local_saving,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -11,12 +11,14 @@ def list_input_files() -> List[str]:
|
||||||
|
|
||||||
|
|
||||||
def list_output_files() -> List[str]:
|
def list_output_files() -> List[str]:
|
||||||
"""List all output audio files."""
|
"""List all output audio files, sorted by most recent first."""
|
||||||
return [
|
files = [
|
||||||
os.path.join(OUTPUTS_DIR, f)
|
os.path.join(OUTPUTS_DIR, f)
|
||||||
for f in os.listdir(OUTPUTS_DIR)
|
for f in os.listdir(OUTPUTS_DIR)
|
||||||
if any(f.endswith(ext) for ext in AUDIO_FORMATS)
|
if any(f.endswith(ext) for ext in AUDIO_FORMATS)
|
||||||
]
|
]
|
||||||
|
# Sort files by modification time, most recent first
|
||||||
|
return sorted(files, key=os.path.getmtime, reverse=True)
|
||||||
|
|
||||||
|
|
||||||
def read_text_file(filename: str) -> str:
|
def read_text_file(filename: str) -> str:
|
||||||
|
|
|
@ -6,7 +6,7 @@ import gradio as gr
|
||||||
from . import api, files
|
from . import api, files
|
||||||
|
|
||||||
|
|
||||||
def setup_event_handlers(components: dict):
|
def setup_event_handlers(components: dict, disable_local_saving: bool = False):
|
||||||
"""Set up all event handlers for the UI components."""
|
"""Set up all event handlers for the UI components."""
|
||||||
|
|
||||||
def refresh_status():
|
def refresh_status():
|
||||||
|
@ -58,27 +58,37 @@ def setup_event_handlers(components: dict):
|
||||||
|
|
||||||
def handle_file_upload(file):
|
def handle_file_upload(file):
|
||||||
if file is None:
|
if file is None:
|
||||||
return gr.update(choices=files.list_input_files())
|
return "" if disable_local_saving else [gr.update(choices=files.list_input_files())]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Copy file to inputs directory
|
# Read the file content
|
||||||
filename = os.path.basename(file.name)
|
with open(file.name, 'r', encoding='utf-8') as f:
|
||||||
target_path = os.path.join(files.INPUTS_DIR, filename)
|
text_content = f.read()
|
||||||
|
|
||||||
# Handle duplicate filenames
|
if disable_local_saving:
|
||||||
base, ext = os.path.splitext(filename)
|
# When saving is disabled, put content directly in text input
|
||||||
counter = 1
|
# Normalize whitespace by replacing newlines with spaces
|
||||||
while os.path.exists(target_path):
|
normalized_text = ' '.join(text_content.split())
|
||||||
new_name = f"{base}_{counter}{ext}"
|
return normalized_text
|
||||||
target_path = os.path.join(files.INPUTS_DIR, new_name)
|
else:
|
||||||
counter += 1
|
# When saving is enabled, save file and update dropdown
|
||||||
|
filename = os.path.basename(file.name)
|
||||||
|
target_path = os.path.join(files.INPUTS_DIR, filename)
|
||||||
|
|
||||||
shutil.copy2(file.name, target_path)
|
# Handle duplicate filenames
|
||||||
|
base, ext = os.path.splitext(filename)
|
||||||
|
counter = 1
|
||||||
|
while os.path.exists(target_path):
|
||||||
|
new_name = f"{base}_{counter}{ext}"
|
||||||
|
target_path = os.path.join(files.INPUTS_DIR, new_name)
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
shutil.copy2(file.name, target_path)
|
||||||
|
return [gr.update(choices=files.list_input_files())]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error uploading file: {e}")
|
print(f"Error handling file: {e}")
|
||||||
|
return "" if disable_local_saving else [gr.update(choices=files.list_input_files())]
|
||||||
return gr.update(choices=files.list_input_files())
|
|
||||||
|
|
||||||
def generate_from_text(text, voice, format, speed):
|
def generate_from_text(text, voice, format, speed):
|
||||||
"""Generate speech from direct text input"""
|
"""Generate speech from direct text input"""
|
||||||
|
@ -91,7 +101,10 @@ def setup_event_handlers(components: dict):
|
||||||
gr.Warning("Please enter text in the input box")
|
gr.Warning("Please enter text in the input box")
|
||||||
return [None, gr.update(choices=files.list_output_files())]
|
return [None, gr.update(choices=files.list_output_files())]
|
||||||
|
|
||||||
files.save_text(text)
|
# Only save text if local saving is enabled
|
||||||
|
if not disable_local_saving:
|
||||||
|
files.save_text(text)
|
||||||
|
|
||||||
result = api.text_to_speech(text, voice, format, speed)
|
result = api.text_to_speech(text, voice, format, speed)
|
||||||
if result is None:
|
if result is None:
|
||||||
gr.Warning("Failed to generate speech. Please try again.")
|
gr.Warning("Failed to generate speech. Please try again.")
|
||||||
|
@ -162,45 +175,7 @@ def setup_event_handlers(components: dict):
|
||||||
outputs=[components["model"]["status_btn"], components["model"]["voice"]],
|
outputs=[components["model"]["status_btn"], components["model"]["voice"]],
|
||||||
)
|
)
|
||||||
|
|
||||||
components["input"]["file_select"].change(
|
# Connect text submit button (always present)
|
||||||
fn=handle_file_select,
|
|
||||||
inputs=[components["input"]["file_select"]],
|
|
||||||
outputs=[components["input"]["file_preview"]],
|
|
||||||
)
|
|
||||||
|
|
||||||
components["input"]["file_upload"].upload(
|
|
||||||
fn=handle_file_upload,
|
|
||||||
inputs=[components["input"]["file_upload"]],
|
|
||||||
outputs=[components["input"]["file_select"]],
|
|
||||||
)
|
|
||||||
|
|
||||||
components["output"]["play_btn"].click(
|
|
||||||
fn=play_selected,
|
|
||||||
inputs=[components["output"]["output_files"]],
|
|
||||||
outputs=[components["output"]["selected_audio"]],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Connect clear files button
|
|
||||||
components["input"]["clear_files"].click(
|
|
||||||
fn=clear_files,
|
|
||||||
inputs=[
|
|
||||||
components["model"]["voice"],
|
|
||||||
components["model"]["format"],
|
|
||||||
components["model"]["speed"],
|
|
||||||
],
|
|
||||||
outputs=[
|
|
||||||
components["input"]["file_select"],
|
|
||||||
components["input"]["file_upload"],
|
|
||||||
components["input"]["file_preview"],
|
|
||||||
components["output"]["audio_output"],
|
|
||||||
components["output"]["output_files"],
|
|
||||||
components["model"]["voice"],
|
|
||||||
components["model"]["format"],
|
|
||||||
components["model"]["speed"],
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Connect submit buttons for each tab
|
|
||||||
components["input"]["text_submit"].click(
|
components["input"]["text_submit"].click(
|
||||||
fn=generate_from_text,
|
fn=generate_from_text,
|
||||||
inputs=[
|
inputs=[
|
||||||
|
@ -215,26 +190,70 @@ def setup_event_handlers(components: dict):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Connect clear outputs button
|
# Only connect file-related handlers if components exist
|
||||||
components["output"]["clear_outputs"].click(
|
if components["input"]["file_select"] is not None:
|
||||||
fn=clear_outputs,
|
components["input"]["file_select"].change(
|
||||||
outputs=[
|
fn=handle_file_select,
|
||||||
components["output"]["audio_output"],
|
inputs=[components["input"]["file_select"]],
|
||||||
components["output"]["output_files"],
|
outputs=[components["input"]["file_preview"]],
|
||||||
components["output"]["selected_audio"],
|
)
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
components["input"]["file_submit"].click(
|
if components["input"]["file_upload"] is not None:
|
||||||
fn=generate_from_file,
|
# File upload handler - output depends on disable_local_saving
|
||||||
inputs=[
|
components["input"]["file_upload"].upload(
|
||||||
components["input"]["file_select"],
|
fn=handle_file_upload,
|
||||||
components["model"]["voice"],
|
inputs=[components["input"]["file_upload"]],
|
||||||
components["model"]["format"],
|
outputs=[components["input"]["text_input"] if disable_local_saving else components["input"]["file_select"]],
|
||||||
components["model"]["speed"],
|
)
|
||||||
],
|
|
||||||
outputs=[
|
if components["output"]["play_btn"] is not None:
|
||||||
components["output"]["audio_output"],
|
components["output"]["play_btn"].click(
|
||||||
components["output"]["output_files"],
|
fn=play_selected,
|
||||||
],
|
inputs=[components["output"]["output_files"]],
|
||||||
)
|
outputs=[components["output"]["selected_audio"]],
|
||||||
|
)
|
||||||
|
|
||||||
|
if components["input"]["clear_files"] is not None:
|
||||||
|
components["input"]["clear_files"].click(
|
||||||
|
fn=clear_files,
|
||||||
|
inputs=[
|
||||||
|
components["model"]["voice"],
|
||||||
|
components["model"]["format"],
|
||||||
|
components["model"]["speed"],
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
components["input"]["file_select"],
|
||||||
|
components["input"]["file_upload"],
|
||||||
|
components["input"]["file_preview"],
|
||||||
|
components["output"]["audio_output"],
|
||||||
|
components["output"]["output_files"],
|
||||||
|
components["model"]["voice"],
|
||||||
|
components["model"]["format"],
|
||||||
|
components["model"]["speed"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
if components["output"]["clear_outputs"] is not None:
|
||||||
|
components["output"]["clear_outputs"].click(
|
||||||
|
fn=clear_outputs,
|
||||||
|
outputs=[
|
||||||
|
components["output"]["audio_output"],
|
||||||
|
components["output"]["output_files"],
|
||||||
|
components["output"]["selected_audio"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
if components["input"]["file_submit"] is not None:
|
||||||
|
components["input"]["file_submit"].click(
|
||||||
|
fn=generate_from_file,
|
||||||
|
inputs=[
|
||||||
|
components["input"]["file_select"],
|
||||||
|
components["model"]["voice"],
|
||||||
|
components["model"]["format"],
|
||||||
|
components["model"]["speed"],
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
components["output"]["audio_output"],
|
||||||
|
components["output"]["output_files"],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
|
@ -26,7 +26,7 @@ def create_interface():
|
||||||
# Main interface
|
# Main interface
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
# Create columns
|
# Create columns
|
||||||
input_col, input_components = create_input_column()
|
input_col, input_components = create_input_column(disable_local_saving)
|
||||||
model_col, model_components = create_model_column(
|
model_col, model_components = create_model_column(
|
||||||
available_voices
|
available_voices
|
||||||
) # Pass initial voices
|
) # Pass initial voices
|
||||||
|
@ -40,7 +40,7 @@ def create_interface():
|
||||||
}
|
}
|
||||||
|
|
||||||
# Set up event handlers
|
# Set up event handlers
|
||||||
setup_event_handlers(components)
|
setup_event_handlers(components, disable_local_saving)
|
||||||
|
|
||||||
# Add periodic status check with Timer
|
# Add periodic status check with Timer
|
||||||
def update_status():
|
def update_status():
|
||||||
|
|
|
@ -106,24 +106,54 @@ def test_get_status_html_unavailable():
|
||||||
|
|
||||||
def test_text_to_speech_api_params(mock_response, tmp_path):
|
def test_text_to_speech_api_params(mock_response, tmp_path):
|
||||||
"""Test correct API parameters are sent"""
|
"""Test correct API parameters are sent"""
|
||||||
with patch("requests.post") as mock_post, patch(
|
test_cases = [
|
||||||
"ui.lib.api.OUTPUTS_DIR", str(tmp_path)
|
# Single voice as string
|
||||||
), patch("builtins.open", mock_open()):
|
("voice1", "voice1"),
|
||||||
mock_post.return_value = mock_response({})
|
# Multiple voices as list
|
||||||
api.text_to_speech("test text", "voice1", "mp3", 1.5)
|
(["voice1", "voice2"], "voice1+voice2"),
|
||||||
|
# Single voice as list
|
||||||
|
(["voice1"], "voice1"),
|
||||||
|
]
|
||||||
|
|
||||||
mock_post.assert_called_once()
|
for input_voice, expected_voice in test_cases:
|
||||||
args, kwargs = mock_post.call_args
|
with patch("requests.post") as mock_post, patch(
|
||||||
|
"ui.lib.api.OUTPUTS_DIR", str(tmp_path)
|
||||||
|
), patch("builtins.open", mock_open()):
|
||||||
|
mock_post.return_value = mock_response({})
|
||||||
|
api.text_to_speech("test text", input_voice, "mp3", 1.5)
|
||||||
|
|
||||||
# Check request body
|
mock_post.assert_called_once()
|
||||||
assert kwargs["json"] == {
|
args, kwargs = mock_post.call_args
|
||||||
"model": "kokoro",
|
|
||||||
"input": "test text",
|
|
||||||
"voice": "voice1",
|
|
||||||
"response_format": "mp3",
|
|
||||||
"speed": 1.5,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check headers and timeout
|
# Check request body
|
||||||
assert kwargs["headers"] == {"Content-Type": "application/json"}
|
assert kwargs["json"] == {
|
||||||
assert kwargs["timeout"] == 300
|
"model": "kokoro",
|
||||||
|
"input": "test text",
|
||||||
|
"voice": expected_voice,
|
||||||
|
"response_format": "mp3",
|
||||||
|
"speed": 1.5,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check headers and timeout
|
||||||
|
assert kwargs["headers"] == {"Content-Type": "application/json"}
|
||||||
|
assert kwargs["timeout"] == 300
|
||||||
|
|
||||||
|
|
||||||
|
def test_text_to_speech_output_filename(mock_response, tmp_path):
|
||||||
|
"""Test output filename contains correct voice identifier"""
|
||||||
|
test_cases = [
|
||||||
|
# Single voice
|
||||||
|
("voice1", lambda f: "voice-voice1" in f),
|
||||||
|
# Multiple voices
|
||||||
|
(["voice1", "voice2"], lambda f: "voice-voice1+voice2" in f),
|
||||||
|
]
|
||||||
|
|
||||||
|
for input_voice, filename_check in test_cases:
|
||||||
|
with patch("requests.post", return_value=mock_response({})), patch(
|
||||||
|
"ui.lib.api.OUTPUTS_DIR", str(tmp_path)
|
||||||
|
), patch("builtins.open", mock_open()) as mock_file:
|
||||||
|
result = api.text_to_speech("test text", input_voice, "mp3", 1.0)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert filename_check(result), f"Expected voice pattern not found in filename: {result}"
|
||||||
|
mock_file.assert_called_once()
|
||||||
|
|
|
@ -36,8 +36,10 @@ def test_model_column_default_values():
|
||||||
expected_choices = [(voice_id, voice_id) for voice_id in voice_ids]
|
expected_choices = [(voice_id, voice_id) for voice_id in voice_ids]
|
||||||
assert components["voice"].choices == expected_choices
|
assert components["voice"].choices == expected_choices
|
||||||
# Value is not converted to tuple format for the value property
|
# Value is not converted to tuple format for the value property
|
||||||
assert components["voice"].value == voice_ids[0]
|
assert components["voice"].value == [voice_ids[0]]
|
||||||
assert components["voice"].interactive is True
|
assert components["voice"].interactive is True
|
||||||
|
assert components["voice"].multiselect is True
|
||||||
|
assert components["voice"].label == "Voice(s)"
|
||||||
|
|
||||||
# Test format dropdown
|
# Test format dropdown
|
||||||
# Gradio Dropdown converts choices to (value, label) tuples
|
# Gradio Dropdown converts choices to (value, label) tuples
|
||||||
|
|
|
@ -136,7 +136,7 @@ def test_interface_components_presence():
|
||||||
|
|
||||||
required_components = {
|
required_components = {
|
||||||
"Text to speak",
|
"Text to speak",
|
||||||
"Voice",
|
"Voice(s)",
|
||||||
"Audio Format",
|
"Audio Format",
|
||||||
"Speed",
|
"Speed",
|
||||||
"Generated Speech",
|
"Generated Speech",
|
||||||
|
|
Loading…
Add table
Reference in a new issue