Updated commands for pushing to and running from ECR.

Updated python startup logic for simplicity, considering that I don't do warmup anymore.
This commit is contained in:
faltiska 2025-06-09 11:17:13 +03:00
parent 7fdfd66992
commit f18ab7dff7
4 changed files with 33 additions and 78 deletions

View file

@ -4,17 +4,15 @@ from typing import Optional
from loguru import logger
from ..core import paths
from ..core.config import settings
from ..core.model_config import ModelConfig, model_config
from .base import BaseModelBackend
from .kokoro_v1 import KokoroV1
from ..core.config import settings
from ..core.model_config import ModelConfig, model_config
class ModelManager:
"""Manages Kokoro V1 model loading and inference."""
# Singleton instance
_instance = None
def __init__(self, config: Optional[ModelConfig] = None):
@ -27,76 +25,30 @@ class ModelManager:
self._backend: Optional[KokoroV1] = None # Explicitly type as KokoroV1
self._device: Optional[str] = None
def _determine_device(self) -> str:
@staticmethod
def _determine_device() -> str:
"""Determine device based on settings."""
return "cuda" if settings.use_gpu else "cpu"
async def initialize(self) -> None:
async def initialize(self) -> str:
"""Initialize Kokoro V1 backend."""
import time
start = time.perf_counter()
try:
self._device = self._determine_device()
logger.info(f"Initializing Kokoro V1 on {self._device}")
self._backend = KokoroV1()
except Exception as e:
raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")
async def initialize_with_warmup(self, voice_manager) -> tuple[str, str, int]:
"""Initialize and warm up model.
Args:
voice_manager: Voice manager instance for warmup
Returns:
Tuple of (device, backend type, voice count)
Raises:
RuntimeError: If initialization fails
"""
import time
start = time.perf_counter()
try:
# Initialize backend
await self.initialize()
# Load model
model_path = self._config.pytorch_kokoro_v1_file
await self.load_model(model_path)
# Use paths module to get voice path
try:
voices = await paths.list_voices()
voice_path = await paths.get_voice_path(settings.default_voice)
# Warm up with short text
warmup_text = "Warmup text for initialization."
# Use default voice name for warmup
voice_name = settings.default_voice
logger.debug(f"Using default voice '{voice_name}' for warmup")
async for _ in self.generate(warmup_text, (voice_name, voice_path)):
pass
except Exception as e:
raise RuntimeError(f"Failed to get default voice: {e}")
ms = int((time.perf_counter() - start) * 1000)
logger.info(f"Warmup completed in {ms}ms")
logger.info(f"Initialized in {ms}ms")
return self._device, "kokoro_v1", len(voices)
except FileNotFoundError as e:
logger.error("""
Model files not found! You need to download the Kokoro V1 model:
1. Download model using the script:
python docker/scripts/download_model.py --output api/src/models/v1_0
2. Or set environment variable in docker-compose:
DOWNLOAD_MODEL=true
""")
exit(0)
return self._device
except Exception as e:
raise RuntimeError(f"Warmup failed: {e}")
raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")
def get_backend(self) -> BaseModelBackend:
"""Get initialized backend.

View file

@ -2,10 +2,8 @@
FastAPI OpenAI Compatible API
"""
import os
import sys
from contextlib import asynccontextmanager
from pathlib import Path
import torch
import uvicorn
@ -55,7 +53,7 @@ async def lifespan(app: FastAPI):
try:
model_manager = await get_manager()
await get_voice_manager()
device = await model_manager.load_voices()
device = await model_manager.initialize()
except Exception as e:
logger.error(f"Failed to initialize model: {e}")
@ -63,7 +61,6 @@ async def lifespan(app: FastAPI):
boundary = "" * 2 * 12
startup_msg = f"""
{boundary}
@ -74,7 +71,8 @@ async def lifespan(app: FastAPI):
{boundary}
"""
"""
startup_msg += f"\nModel loaded on {device}"
if device == "mps":
startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
@ -83,16 +81,11 @@ async def lifespan(app: FastAPI):
else:
startup_msg += "\nRunning on CPU"
# Add web player info if enabled
if settings.enable_web_player:
startup_msg += (
f"\n\nBeta Web Player: http://{settings.host}:{settings.port}/web/"
)
startup_msg += f"\nor http://localhost:{settings.port}/web/"
startup_msg += f"\nWeb UI: or http://localhost:{settings.port}/web/"
else:
startup_msg += "\n\nWeb Player: disabled"
startup_msg += "\n\nWeb UI disabled."
startup_msg += f"\n{boundary}\n"
logger.info(startup_msg)
yield

View file

@ -45,14 +45,14 @@ CMD ["./entrypoint.sh"]
# If you want to test the docker image locally, run this from the project root:
# docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
# Run it with:
# docker run -p 8880:8880 --name evie/kokorotts evie/kokorotts
# docker run -p 8880:8880 --name kokorotts evie/kokorotts
#
# You can log into the container with
# docker exec -it evie/kokorotts /bin/bash
# docker exec -it kokorotts /bin/bash
#
# Other commands:
# 1. Stop and remove container
# docker container remove evie/kokorotts
# docker container remove kokorotts
#
# 2. List or remove images
# docker images
@ -62,3 +62,13 @@ CMD ["./entrypoint.sh"]
# https://docs.aws.amazon.com/AmazonECR/latest/userguide/getting-started-cli.html
# This is my private repo:
# https://eu-west-1.console.aws.amazon.com/ecr/private-registry/repositories?region=eu-west-1
# I can get the dodcker push / pull commands from there.
#
# SSH keys for logging into EC2 containers:
# https://eu-west-1.console.aws.amazon.com/ec2/home?region=eu-west-1#KeyPairs:
#
# aws ecr get-login-password --region eu-west-1 | docker login --username AWS --password-stdin 678811077621.dkr.ecr.eu-west-1.amazonaws.com
# docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
# docker tag evie/kokorotts:latest 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
# docker push 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
# docker run --gpus all -p 8880:8880 --name kokorotts 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts

View file

@ -1,4 +1,4 @@
$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
# $env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
$env:PYTHONUTF8=1
$Env:PROJECT_ROOT="$pwd"
$Env:USE_GPU="true"
@ -10,4 +10,4 @@ $Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"
uv pip install -e ".[gpu]"
uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
uvicorn api.src.main:app --host 0.0.0.0 --port 8880