mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-08-05 16:48:53 +00:00
Updated commands for pushing to and running from ECR.
Updated python startup logic for simplicity, considering that I don't do warmup anymore.
This commit is contained in:
parent
7fdfd66992
commit
f18ab7dff7
4 changed files with 33 additions and 78 deletions
|
@ -4,17 +4,15 @@ from typing import Optional
|
|||
|
||||
from loguru import logger
|
||||
|
||||
from ..core import paths
|
||||
from ..core.config import settings
|
||||
from ..core.model_config import ModelConfig, model_config
|
||||
from .base import BaseModelBackend
|
||||
from .kokoro_v1 import KokoroV1
|
||||
from ..core.config import settings
|
||||
from ..core.model_config import ModelConfig, model_config
|
||||
|
||||
|
||||
class ModelManager:
|
||||
"""Manages Kokoro V1 model loading and inference."""
|
||||
|
||||
# Singleton instance
|
||||
_instance = None
|
||||
|
||||
def __init__(self, config: Optional[ModelConfig] = None):
|
||||
|
@ -27,76 +25,30 @@ class ModelManager:
|
|||
self._backend: Optional[KokoroV1] = None # Explicitly type as KokoroV1
|
||||
self._device: Optional[str] = None
|
||||
|
||||
def _determine_device(self) -> str:
|
||||
@staticmethod
|
||||
def _determine_device() -> str:
|
||||
"""Determine device based on settings."""
|
||||
return "cuda" if settings.use_gpu else "cpu"
|
||||
|
||||
async def initialize(self) -> None:
|
||||
async def initialize(self) -> str:
|
||||
"""Initialize Kokoro V1 backend."""
|
||||
import time
|
||||
start = time.perf_counter()
|
||||
|
||||
try:
|
||||
self._device = self._determine_device()
|
||||
logger.info(f"Initializing Kokoro V1 on {self._device}")
|
||||
self._backend = KokoroV1()
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")
|
||||
|
||||
async def initialize_with_warmup(self, voice_manager) -> tuple[str, str, int]:
|
||||
"""Initialize and warm up model.
|
||||
|
||||
Args:
|
||||
voice_manager: Voice manager instance for warmup
|
||||
|
||||
Returns:
|
||||
Tuple of (device, backend type, voice count)
|
||||
|
||||
Raises:
|
||||
RuntimeError: If initialization fails
|
||||
"""
|
||||
import time
|
||||
|
||||
start = time.perf_counter()
|
||||
|
||||
try:
|
||||
# Initialize backend
|
||||
await self.initialize()
|
||||
|
||||
# Load model
|
||||
model_path = self._config.pytorch_kokoro_v1_file
|
||||
await self.load_model(model_path)
|
||||
|
||||
# Use paths module to get voice path
|
||||
try:
|
||||
voices = await paths.list_voices()
|
||||
voice_path = await paths.get_voice_path(settings.default_voice)
|
||||
|
||||
# Warm up with short text
|
||||
warmup_text = "Warmup text for initialization."
|
||||
# Use default voice name for warmup
|
||||
voice_name = settings.default_voice
|
||||
logger.debug(f"Using default voice '{voice_name}' for warmup")
|
||||
async for _ in self.generate(warmup_text, (voice_name, voice_path)):
|
||||
pass
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to get default voice: {e}")
|
||||
|
||||
ms = int((time.perf_counter() - start) * 1000)
|
||||
logger.info(f"Warmup completed in {ms}ms")
|
||||
logger.info(f"Initialized in {ms}ms")
|
||||
|
||||
return self._device, "kokoro_v1", len(voices)
|
||||
except FileNotFoundError as e:
|
||||
logger.error("""
|
||||
Model files not found! You need to download the Kokoro V1 model:
|
||||
|
||||
1. Download model using the script:
|
||||
python docker/scripts/download_model.py --output api/src/models/v1_0
|
||||
|
||||
2. Or set environment variable in docker-compose:
|
||||
DOWNLOAD_MODEL=true
|
||||
""")
|
||||
exit(0)
|
||||
return self._device
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Warmup failed: {e}")
|
||||
raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")
|
||||
|
||||
def get_backend(self) -> BaseModelBackend:
|
||||
"""Get initialized backend.
|
||||
|
|
|
@ -2,10 +2,8 @@
|
|||
FastAPI OpenAI Compatible API
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import uvicorn
|
||||
|
@ -55,7 +53,7 @@ async def lifespan(app: FastAPI):
|
|||
try:
|
||||
model_manager = await get_manager()
|
||||
await get_voice_manager()
|
||||
device = await model_manager.load_voices()
|
||||
device = await model_manager.initialize()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize model: {e}")
|
||||
|
@ -63,7 +61,6 @@ async def lifespan(app: FastAPI):
|
|||
|
||||
boundary = "░" * 2 * 12
|
||||
startup_msg = f"""
|
||||
|
||||
{boundary}
|
||||
|
||||
╔═╗┌─┐┌─┐┌┬┐
|
||||
|
@ -74,7 +71,8 @@ async def lifespan(app: FastAPI):
|
|||
╩ ╩└─┘┴ ┴└─┘
|
||||
|
||||
{boundary}
|
||||
"""
|
||||
"""
|
||||
|
||||
startup_msg += f"\nModel loaded on {device}"
|
||||
if device == "mps":
|
||||
startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
|
||||
|
@ -83,16 +81,11 @@ async def lifespan(app: FastAPI):
|
|||
else:
|
||||
startup_msg += "\nRunning on CPU"
|
||||
|
||||
# Add web player info if enabled
|
||||
if settings.enable_web_player:
|
||||
startup_msg += (
|
||||
f"\n\nBeta Web Player: http://{settings.host}:{settings.port}/web/"
|
||||
)
|
||||
startup_msg += f"\nor http://localhost:{settings.port}/web/"
|
||||
startup_msg += f"\nWeb UI: or http://localhost:{settings.port}/web/"
|
||||
else:
|
||||
startup_msg += "\n\nWeb Player: disabled"
|
||||
startup_msg += "\n\nWeb UI disabled."
|
||||
|
||||
startup_msg += f"\n{boundary}\n"
|
||||
logger.info(startup_msg)
|
||||
|
||||
yield
|
||||
|
|
|
@ -45,14 +45,14 @@ CMD ["./entrypoint.sh"]
|
|||
# If you want to test the docker image locally, run this from the project root:
|
||||
# docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
|
||||
# Run it with:
|
||||
# docker run -p 8880:8880 --name evie/kokorotts evie/kokorotts
|
||||
# docker run -p 8880:8880 --name kokorotts evie/kokorotts
|
||||
#
|
||||
# You can log into the container with
|
||||
# docker exec -it evie/kokorotts /bin/bash
|
||||
# docker exec -it kokorotts /bin/bash
|
||||
#
|
||||
# Other commands:
|
||||
# 1. Stop and remove container
|
||||
# docker container remove evie/kokorotts
|
||||
# docker container remove kokorotts
|
||||
#
|
||||
# 2. List or remove images
|
||||
# docker images
|
||||
|
@ -62,3 +62,13 @@ CMD ["./entrypoint.sh"]
|
|||
# https://docs.aws.amazon.com/AmazonECR/latest/userguide/getting-started-cli.html
|
||||
# This is my private repo:
|
||||
# https://eu-west-1.console.aws.amazon.com/ecr/private-registry/repositories?region=eu-west-1
|
||||
# I can get the dodcker push / pull commands from there.
|
||||
#
|
||||
# SSH keys for logging into EC2 containers:
|
||||
# https://eu-west-1.console.aws.amazon.com/ec2/home?region=eu-west-1#KeyPairs:
|
||||
#
|
||||
# aws ecr get-login-password --region eu-west-1 | docker login --username AWS --password-stdin 678811077621.dkr.ecr.eu-west-1.amazonaws.com
|
||||
# docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
|
||||
# docker tag evie/kokorotts:latest 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
|
||||
# docker push 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
|
||||
# docker run --gpus all -p 8880:8880 --name kokorotts 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts
|
|
@ -1,4 +1,4 @@
|
|||
$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
|
||||
# $env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
|
||||
$env:PYTHONUTF8=1
|
||||
$Env:PROJECT_ROOT="$pwd"
|
||||
$Env:USE_GPU="true"
|
||||
|
@ -10,4 +10,4 @@ $Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"
|
|||
|
||||
uv pip install -e ".[gpu]"
|
||||
uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
|
||||
uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
|
||||
uvicorn api.src.main:app --host 0.0.0.0 --port 8880
|
Loading…
Add table
Reference in a new issue