v1_0 full migration, captions, gpu, cpu, webui updates

This commit is contained in:
remsky 2025-02-05 00:46:01 -07:00
parent 6c234a3b67
commit d3741d0d99
22 changed files with 356 additions and 166 deletions

View file

@ -14,7 +14,6 @@ class Settings(BaseSettings):
output_dir_size_limit_mb: float = 500.0 # Maximum size of output directory in MB
default_voice: str = "af_heart"
use_gpu: bool = True # Whether to use GPU acceleration if available
use_onnx: bool = False # Whether to use ONNX runtime
allow_local_voice_saving: bool = False # Whether to allow saving combined voices locally
# Container absolute paths

View file

@ -1,14 +1,14 @@
{
"models": {
"tts-1": "kokoro-v0_19",
"tts-1-hd": "kokoro-v0_19",
"kokoro": "kokoro-v0_19"
"tts-1": "kokoro-v1_0",
"tts-1-hd": "kokoro-v1_0",
"kokoro": "kokoro-v1_0"
},
"voices": {
"alloy": "am_adam",
"ash": "af_nicole",
"coral": "bf_emma",
"echo": "af_bella",
"alloy": "am_v0adam",
"ash": "af_v0nicole",
"coral": "bf_v0emma",
"echo": "af_v0bella",
"fable": "af_sarah",
"onyx": "bm_george",
"nova": "bf_isabella",

View file

@ -68,9 +68,7 @@ def get_model_name(model: str) -> str:
base_name = _openai_mappings["models"].get(model)
if not base_name:
raise ValueError(f"Unsupported model: {model}")
# Add extension based on runtime config
extension = ".onnx" if settings.use_onnx else ".pth"
return base_name + extension
return base_name + ".pth"
async def process_voices(
@ -378,6 +376,17 @@ async def combine_voices(request: Union[str, List[str]]):
- 400: Invalid request (wrong number of voices, voice not found)
- 500: Server error (file system issues, combination failed)
"""
# Check if local voice saving is allowed
if not settings.allow_local_voice_saving:
raise HTTPException(
status_code=403,
detail={
"error": "permission_denied",
"message": "Local voice saving is disabled",
"type": "permission_error"
}
)
try:
# Convert input to list of voices
if isinstance(request, str):

View file

@ -29,8 +29,8 @@ def mock_openai_mappings():
"""Mock OpenAI mappings for testing."""
with patch("api.src.routers.openai_compatible._openai_mappings", {
"models": {
"tts-1": "kokoro-v0_19",
"tts-1-hd": "kokoro-v0_19"
"tts-1": "kokoro-v1_0",
"tts-1-hd": "kokoro-v1_0"
},
"voices": {
"alloy": "am_adam",

40
docker/cpu/.dockerignore Normal file
View file

@ -0,0 +1,40 @@
# Version control
.git
# Python
__pycache__
*.pyc
*.pyo
*.pyd
.Python
*.py[cod]
*$py.class
.pytest_cache
.coverage
.coveragerc
# Environment
# .env
.venv
env/
venv/
ENV/
# IDE
.idea
.vscode
*.swp
*.swo
# Project specific
examples/
Kokoro-82M/
ui/
tests/
*.md
*.txt
!requirements.txt
# Docker
Dockerfile*
docker-compose*

View file

@ -19,7 +19,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
RUN useradd -m -u 1000 appuser
# Create directories and set ownership
RUN mkdir -p /app/api/src/voices/v1_0 && \
RUN mkdir -p /app/api/src/models/v1_0 && \
chown -R appuser:appuser /app
USER appuser
@ -49,19 +49,14 @@ ENV PYTHONUNBUFFERED=1 \
UV_LINK_MODE=copy
# Core settings that differ from config.py defaults
ENV USE_GPU=false \
USE_ONNX=true
ENV USE_GPU=false
# Model download flags (container-specific)
ENV DOWNLOAD_ONNX=false \
DOWNLOAD_PTH=false
ENV DOWNLOAD_MODEL=false
# Download models based on environment variables
RUN if [ "$DOWNLOAD_ONNX" = "true" ]; then \
python download_model.py --type onnx; \
fi && \
if [ "$DOWNLOAD_PTH" = "true" ]; then \
python download_model.py --type pth; \
# Download model if enabled
RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
python download_model.py --output api/src/models/v1_0; \
fi
# Run FastAPI server

View file

@ -1,7 +1,7 @@
name: kokoro-tts
services:
kokoro-tts:
# image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.1.0
# image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.2.0
build:
context: ../..
dockerfile: docker/cpu/Dockerfile
@ -21,7 +21,7 @@ services:
# # Gradio UI service [Comment out everything below if you don't need it]
# gradio-ui:
# image: ghcr.io/remsky/kokoro-fastapi-ui:v0.1.0
# image: ghcr.io/remsky/kokoro-fastapi-ui:v0.2.0
# # Uncomment below (and comment out above) to build from source instead of using the released image
# build:
# context: ../../ui

40
docker/gpu/.dockerignore Normal file
View file

@ -0,0 +1,40 @@
# Version control
.git
# Python
__pycache__
*.pyc
*.pyo
*.pyd
.Python
*.py[cod]
*$py.class
.pytest_cache
.coverage
.coveragerc
# Environment
# .env
.venv
env/
venv/
ENV/
# IDE
.idea
.vscode
*.swp
*.swo
# Project specific
examples/
Kokoro-82M/
ui/
tests/
*.md
*.txt
!requirements.txt
# Docker
Dockerfile*
docker-compose*

View file

@ -16,11 +16,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# Install UV using the installer script
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
mv /root/.local/bin/uv /usr/local/bin/ && \
mv /root/.local/bin/uvx /usr/local/bin/
# Create non-root user and prepare /app in one layer
RUN useradd -m -u 1000 appuser && \
mkdir -p /app/api/src/voices/v1_0 && \
mv /root/.local/bin/uvx /usr/local/bin/ && \
useradd -m -u 1000 appuser && \
mkdir -p /app/api/src/models/v1_0 && \
chown -R appuser:appuser /app
USER appuser
@ -41,10 +39,6 @@ COPY --chown=appuser:appuser docker/scripts/download_model.* ./
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --extra gpu
# Copy scripts and make them executable in a single RUN step
COPY --chown=appuser:appuser docker/scripts/ /app/docker/scripts/
RUN chmod +x docker/scripts/entrypoint.sh docker/scripts/download_model.sh
# Set all environment variables in one go
ENV PYTHONUNBUFFERED=1 \
PYTHONPATH=/app:/app/api \
@ -52,8 +46,12 @@ ENV PYTHONUNBUFFERED=1 \
UV_LINK_MODE=copy \
USE_GPU=true \
USE_ONNX=false \
DOWNLOAD_PTH=true \
DOWNLOAD_ONNX=false
DOWNLOAD_MODEL=true
# Download model if enabled
RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
python download_model.py --output api/src/models/v1_0; \
fi
# Run FastAPI server
CMD ["/app/docker/scripts/entrypoint.sh"]
CMD ["uv", "run", "python", "-m", "uvicorn", "api.src.main:app", "--host", "0.0.0.0", "--port", "8880", "--log-level", "debug"]

View file

@ -1,7 +1,7 @@
name: kokoro-tts
services:
kokoro-tts:
# image: ghcr.io/remsky/kokoro-fastapi-gpu:v0.1.0
# image: ghcr.io/remsky/kokoro-fastapi-gpu:v0.2.0
build:
context: ../..
dockerfile: docker/gpu/Dockerfile
@ -24,7 +24,7 @@ services:
# # Gradio UI service
# gradio-ui:
# image: ghcr.io/remsky/kokoro-fastapi-ui:v0.1.0
# image: ghcr.io/remsky/kokoro-fastapi-ui:v0.2.0
# # Uncomment below to build from source instead of using the released image
# # build:
# # context: ../../ui

View file

@ -1,62 +1,84 @@
#!/usr/bin/env python3
"""Download and prepare Kokoro model for Docker build."""
"""Download and prepare Kokoro v1.0 model."""
import argparse
import json
import os
import shutil
from pathlib import Path
from urllib.request import urlretrieve
import torch
from huggingface_hub import hf_hub_download
from loguru import logger
def download_model(version: str, output_dir: str) -> None:
"""Download model files from HuggingFace.
def verify_files(model_path: str, config_path: str) -> bool:
"""Verify that model files exist and are valid.
Args:
model_path: Path to model file
config_path: Path to config file
Returns:
True if files exist and are valid
"""
try:
# Check files exist
if not os.path.exists(model_path):
return False
if not os.path.exists(config_path):
return False
# Verify config file is valid JSON
with open(config_path) as f:
config = json.load(f)
# Check model file size (should be non-zero)
if os.path.getsize(model_path) == 0:
return False
return True
except Exception:
return False
def download_model(output_dir: str) -> None:
"""Download model files from GitHub release.
Args:
version: Model version to download
output_dir: Directory to save model files
"""
try:
logger.info(f"Downloading Kokoro model version {version}")
# Create output directory
os.makedirs(output_dir, exist_ok=True)
# Download model files
model_file = hf_hub_download(
repo_id="hexgrad/Kokoro-82M",
filename=f"kokoro-{version}.pth"
)
config_file = hf_hub_download(
repo_id="hexgrad/Kokoro-82M",
filename="config.json"
)
# Define file paths
model_file = "kokoro-v1_0.pth"
config_file = "config.json"
model_path = os.path.join(output_dir, model_file)
config_path = os.path.join(output_dir, config_file)
# Copy to output directory
shutil.copy2(model_file, os.path.join(output_dir, "model.pt"))
shutil.copy2(config_file, os.path.join(output_dir, "config.json"))
# Verify files
model_path = os.path.join(output_dir, "model.pt")
config_path = os.path.join(output_dir, "config.json")
if not os.path.exists(model_path):
raise RuntimeError(f"Model file not found: {model_path}")
if not os.path.exists(config_path):
raise RuntimeError(f"Config file not found: {config_path}")
# Check if files already exist and are valid
if verify_files(model_path, config_path):
logger.info("Model files already exist and are valid")
return
# Load and verify model
logger.info("Verifying model files...")
with open(config_path) as f:
config = json.load(f)
logger.info(f"Loaded config: {config}")
logger.info("Downloading Kokoro v1.0 model files")
model = torch.load(model_path, map_location="cpu")
logger.info(f"Loaded model with keys: {model.keys()}")
# GitHub release URLs (to be updated with v0.2.0 release)
base_url = "https://github.com/remsky/Kokoro-FastAPI/releases/download/v0.2.0"
model_url = f"{base_url}/{model_file}"
config_url = f"{base_url}/{config_file}"
# Download files
logger.info("Downloading model file...")
urlretrieve(model_url, model_path)
logger.info("Downloading config file...")
urlretrieve(config_url, config_path)
# Verify downloaded files
if not verify_files(model_path, config_path):
raise RuntimeError("Failed to verify downloaded files")
logger.info(f"✓ Model files prepared in {output_dir}")
except Exception as e:
@ -66,12 +88,9 @@ def download_model(version: str, output_dir: str) -> None:
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(description="Download Kokoro model for Docker build")
parser.add_argument(
"--version",
default="v1_0",
help="Model version to download"
)
import argparse
parser = argparse.ArgumentParser(description="Download Kokoro v1.0 model")
parser.add_argument(
"--output",
required=True,
@ -79,7 +98,7 @@ def main():
)
args = parser.parse_args()
download_model(args.version, args.output)
download_model(args.output)
if __name__ == "__main__":

View file

@ -19,21 +19,38 @@ find_project_root() {
exit 1
}
# Function to verify files exist and are valid
verify_files() {
local model_path="$1"
local config_path="$2"
# Check files exist
if [ ! -f "$model_path" ] || [ ! -f "$config_path" ]; then
return 1
fi
# Check files are not empty
if [ ! -s "$model_path" ] || [ ! -s "$config_path" ]; then
return 1
fi
# Try to parse config.json
if ! jq . "$config_path" >/dev/null 2>&1; then
return 1
fi
return 0
}
# Function to download a file
download_file() {
local url="$1"
local output_dir="$2"
local model_type="$3"
local filename=$(basename "$url")
# Validate file extension
if [[ ! "$filename" =~ \.$model_type$ ]]; then
echo "Warning: $filename is not a .$model_type file" >&2
return 1
}
local output_path="$2"
local filename=$(basename "$output_path")
echo "Downloading $filename..."
if curl -L "$url" -o "$output_dir/$filename"; then
mkdir -p "$(dirname "$output_path")"
if curl -L "$url" -o "$output_path"; then
echo "Successfully downloaded $filename"
return 0
else
@ -42,69 +59,49 @@ download_file() {
fi
}
# Parse arguments
MODEL_TYPE=""
while [[ $# -gt 0 ]]; do
case $1 in
--type)
MODEL_TYPE="$2"
shift 2
;;
*)
# If no flag specified, treat remaining args as model URLs
break
;;
esac
done
# Validate model type
if [ "$MODEL_TYPE" != "pth" ] && [ "$MODEL_TYPE" != "onnx" ]; then
echo "Error: Must specify model type with --type (pth or onnx)" >&2
exit 1
fi
# Find project root and ensure models directory exists
PROJECT_ROOT=$(find_project_root)
if [ $? -ne 0 ]; then
exit 1
fi
MODELS_DIR="$PROJECT_ROOT/api/src/models"
echo "Downloading models to $MODELS_DIR"
mkdir -p "$MODELS_DIR"
MODEL_DIR="$PROJECT_ROOT/api/src/models/v1_0"
echo "Model directory: $MODEL_DIR"
mkdir -p "$MODEL_DIR"
# Default models if no arguments provided
if [ "$MODEL_TYPE" = "pth" ]; then
DEFAULT_MODELS=(
"https://github.com/remsky/Kokoro-FastAPI/releases/download/v0.1.0/kokoro-v0_19.pth"
"https://github.com/remsky/Kokoro-FastAPI/releases/download/v0.1.0/kokoro-v0_19-half.pth"
)
else
DEFAULT_MODELS=(
"https://github.com/remsky/Kokoro-FastAPI/releases/download/v0.1.0/kokoro-v0_19.onnx"
"https://github.com/remsky/Kokoro-FastAPI/releases/download/v0.1.0/kokoro-v0_19_fp16.onnx"
)
# Define file paths
MODEL_FILE="kokoro-v1_0.pth"
CONFIG_FILE="config.json"
MODEL_PATH="$MODEL_DIR/$MODEL_FILE"
CONFIG_PATH="$MODEL_DIR/$CONFIG_FILE"
# Check if files already exist and are valid
if verify_files "$MODEL_PATH" "$CONFIG_PATH"; then
echo "Model files already exist and are valid"
exit 0
fi
# Use provided models or default
if [ $# -gt 0 ]; then
MODELS=("$@")
else
MODELS=("${DEFAULT_MODELS[@]}")
fi
# Define URLs
BASE_URL="https://github.com/remsky/Kokoro-FastAPI/releases/download/v0.2.0"
MODEL_URL="$BASE_URL/$MODEL_FILE"
CONFIG_URL="$BASE_URL/$CONFIG_FILE"
# Download all models
# Download files
success=true
for model in "${MODELS[@]}"; do
if ! download_file "$model" "$MODELS_DIR" "$MODEL_TYPE"; then
success=false
fi
done
if [ "$success" = true ]; then
echo "${MODEL_TYPE^^} model download complete!"
if ! download_file "$MODEL_URL" "$MODEL_PATH"; then
success=false
fi
if ! download_file "$CONFIG_URL" "$CONFIG_PATH"; then
success=false
fi
# Verify downloaded files
if [ "$success" = true ] && verify_files "$MODEL_PATH" "$CONFIG_PATH"; then
echo "✓ Model files prepared in $MODEL_DIR"
exit 0
else
echo "Some downloads failed" >&2
echo "Failed to download or verify model files" >&2
exit 1
fi

View file

@ -1,12 +1,8 @@
#!/bin/bash
set -e
if [ "$DOWNLOAD_PTH" = "true" ]; then
python docker/scripts/download_model.py --type pth
fi
if [ "$DOWNLOAD_ONNX" = "true" ]; then
python docker/scripts/download_model.py --type onnx
if [ "$DOWNLOAD_MODEL" = "true" ]; then
python docker/scripts/download_model.py --output api/src/models/v1_0
fi
exec uv run python -m uvicorn api.src.main:app --host 0.0.0.0 --port 8880 --log-level debug

View file

@ -54,7 +54,7 @@ def main():
examples = [
"Hello world! Welcome to the captioned speech system.",
"The quick brown fox jumps over the lazy dog.",
"""If you have access to a room where gasoline is stored, remember that gas vapor accumulating in a closed room will explode after a time if you leave a candle burning in the room. A good deal of evaporation, however, must occur from the gasoline tins into the air of the room. If removal of the tops of the tins does not expose enough gasoline to the air to ensure copious evaporation, you can open lightly constructed tins further with a knife, ice pick or sharpened nail file. Or puncture a tiny hole in the tank which will permit gasoline to leak out on the floor. This will greatly increase the rate of evaporation. Before you light your candle, be sure that windows are closed and the room is as air-tight as you can make it. If you can see that windows in a neighboring room are opened wide, you have a chance of setting a large fire which will not only destroy the gasoline but anything else nearby; when the gasoline explodes, the doors of the storage room will be blown open, a draft to the neighboring windows will be created which will whip up a fine conflagration"""
"""Of course if you come to the place fresh from New York, you are deceived. Your standard of vision is all astray, You do think the place is quiet. You do imagine that Mr. Smith is asleep merely because he closes his eyes as he stands. But live in Mariposa for six months or a year and then you will begin to understand it better; the buildings get higher and higher; the Mariposa House grows more and more luxurious; McCarthy's block towers to the sky; the 'buses roar and hum to the station; the trains shriek; the traffic multiplies; the people move faster and faster; a dense crowd swirls to and fro in the post-office and the five and ten cent store—and amusements! well, now! lacrosse, baseball, excursions, dances, the Fireman's Ball every winter and the Catholic picnic every summer; and music—the town band in the park every Wednesday evening, and the Oddfellows' brass band on the street every other Friday; the Mariposa Quartette, the Salvation Army—why, after a few months' residence you begin to realize that the place is a mere mad round of gaiety."""
]
print("Generating captioned speech for example texts...\n")

View file

@ -34,7 +34,6 @@ dependencies = [
# "html2text>=2024.2.26",
"pydub>=0.25.1",
"matplotlib>=3.10.0",
"semchunk>=3.0.1",
"mutagen>=1.47.0",
"psutil>=6.1.1",
"kokoro==0.7.4",
@ -46,11 +45,11 @@ dependencies = [
[project.optional-dependencies]
gpu = [
"torch==2.5.1+cu121",
"onnxruntime-gpu==1.20.1",
#"onnxruntime-gpu==1.20.1",
]
cpu = [
"torch==2.5.1",
"onnxruntime==1.20.1",
#"onnxruntime==1.20.1",
]
test = [
"pytest==8.0.0",

49
slim.report.json Normal file
View file

@ -0,0 +1,49 @@
{
"document": "doc.report.command",
"version": "ov/command/slim/1.1",
"engine": "linux/amd64|ALP|x.1.42.2|29e62e7836de7b1004607c51c502537ffe1969f0|2025-01-16_07:48:54AM|x",
"containerized": false,
"host_distro": {
"name": "Ubuntu",
"version": "22.04",
"display_name": "Ubuntu 22.04.5 LTS"
},
"type": "slim",
"state": "error",
"target_reference": "kokoro-fastapi:latest",
"system": {
"type": "",
"release": "",
"distro": {
"name": "",
"version": "",
"display_name": ""
}
},
"source_image": {
"identity": {
"id": ""
},
"size": 0,
"size_human": "",
"create_time": "",
"architecture": "",
"container_entry": {
"exe_path": ""
}
},
"minified_image_size": 0,
"minified_image_size_human": "",
"minified_image": "",
"minified_image_id": "",
"minified_image_digest": "",
"minified_image_has_data": false,
"minified_by": 0,
"artifact_location": "",
"container_report_name": "",
"seccomp_profile_name": "",
"apparmor_profile_name": "",
"image_stack": null,
"image_created": false,
"image_build_engine": ""
}

View file

@ -84,6 +84,10 @@
</div>
</div>
<div class="button-group">
<input type="file" id="file-input" accept=".txt" style="display: none;">
<button id="upload-btn" class="clear-btn">
Upload Text
</button>
<button id="clear-btn" class="clear-btn">
Clear Text
</button>

View file

@ -13,6 +13,8 @@ export class App {
generateBtnText: document.querySelector('#generate-btn .btn-text'),
generateBtnLoader: document.querySelector('#generate-btn .loader'),
downloadBtn: document.getElementById('download-btn'),
fileInput: document.getElementById('file-input'),
uploadBtn: document.getElementById('upload-btn'),
autoplayToggle: document.getElementById('autoplay-toggle'),
formatSelect: document.getElementById('format-select'),
status: document.getElementById('status'),
@ -67,6 +69,34 @@ export class App {
this.elements.textInput.focus();
});
// Upload button
this.elements.uploadBtn.addEventListener('click', () => {
this.elements.fileInput.click();
});
// File input change
this.elements.fileInput.addEventListener('change', async (event) => {
const file = event.target.files[0];
if (!file) return;
if (file.size > 1024 * 1024) { // 1MB limit
this.showStatus('File too large. Please choose a file under 1MB', 'error');
return;
}
try {
const text = await file.text();
this.elements.textInput.value = text;
this.showStatus('File loaded successfully', 'success');
} catch (error) {
console.error('Error reading file:', error);
this.showStatus('Error reading file', 'error');
}
// Clear the input so the same file can be loaded again
this.elements.fileInput.value = '';
});
// Handle page unload
window.addEventListener('beforeunload', () => {
this.audioService.cleanup();

View file

@ -50,7 +50,15 @@ export class VoiceService {
}
getSelectedVoiceString() {
return Array.from(this.selectedVoices.entries())
const entries = Array.from(this.selectedVoices.entries());
// If only one voice with weight 1, return just the voice name
if (entries.length === 1 && entries[0][1] === 1) {
return entries[0][0];
}
// Otherwise return voice(weight) format
return entries
.map(([voice, weight]) => `${voice}(${weight})`)
.join('+');
}

View file

@ -22,7 +22,8 @@ body {
color: var(--text);
background: radial-gradient(circle at top right,
var(--fg-color) 0%,
var(--bg-color) 100%);
var(--bg-color) 80%);
background-attachment: fixed;
min-height: 100vh;
position: relative;
}
@ -30,7 +31,7 @@ body {
.overlay {
position: fixed;
inset: 0;
background-image:
background-image:
repeating-linear-gradient(0deg,
rgba(255,255,255,0.03) 0px,
rgba(255,255,255,0.03) 1px,
@ -42,20 +43,23 @@ body {
transparent 1px,
transparent 20px);
pointer-events: none;
z-index: 1;
}
.sun {
position: fixed;
top: 20px;
right: 20px;
width: 100px;
height: 100px;
bottom: 40px;
right: 40px;
width: 80px;
height: 80px;
border-radius: 50%;
background: radial-gradient(circle at center,
rgba(99, 102, 241, 0.2) 0%,
transparent 70%);
background-color: rgba(99, 102, 241, 0.4);
box-shadow:
0 0 40px 15px rgba(213, 99, 241, 0.4),
0 0 80px 25px rgba(99, 102, 241, 0.3),
0 0 120px 35px rgba(91, 53, 228, 0.2);
pointer-events: none;
z-index: 0;
z-index: 9999;
}
.scanline {
@ -64,7 +68,7 @@ body {
left: 0;
width: 100%;
height: 2px;
background: rgba(99, 102, 241, 0.1);
background: rgba(218, 140, 198, 0.375);
animation: scan 4s linear infinite;
}

View file

@ -34,6 +34,7 @@ textarea::placeholder {
display: flex;
flex-direction: column;
gap: 1rem;
z-index: 1001; /* Higher than other elements */
}
.voice-search {

View file

@ -57,4 +57,6 @@ main {
backdrop-filter: blur(12px);
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
#upload-btn {
display: inline-block;
}