Updated commands for pushing to and running from ECR.

Updated python startup logic for simplicity, considering that I don't do warmup anymore.
This commit is contained in:
faltiska 2025-06-09 11:17:13 +03:00
parent 7fdfd66992
commit f18ab7dff7
4 changed files with 33 additions and 78 deletions

View file

@ -4,17 +4,15 @@ from typing import Optional
from loguru import logger from loguru import logger
from ..core import paths
from ..core.config import settings
from ..core.model_config import ModelConfig, model_config
from .base import BaseModelBackend from .base import BaseModelBackend
from .kokoro_v1 import KokoroV1 from .kokoro_v1 import KokoroV1
from ..core.config import settings
from ..core.model_config import ModelConfig, model_config
class ModelManager: class ModelManager:
"""Manages Kokoro V1 model loading and inference.""" """Manages Kokoro V1 model loading and inference."""
# Singleton instance
_instance = None _instance = None
def __init__(self, config: Optional[ModelConfig] = None): def __init__(self, config: Optional[ModelConfig] = None):
@ -27,76 +25,30 @@ class ModelManager:
self._backend: Optional[KokoroV1] = None # Explicitly type as KokoroV1 self._backend: Optional[KokoroV1] = None # Explicitly type as KokoroV1
self._device: Optional[str] = None self._device: Optional[str] = None
def _determine_device(self) -> str: @staticmethod
def _determine_device() -> str:
"""Determine device based on settings.""" """Determine device based on settings."""
return "cuda" if settings.use_gpu else "cpu" return "cuda" if settings.use_gpu else "cpu"
async def initialize(self) -> None: async def initialize(self) -> str:
"""Initialize Kokoro V1 backend.""" """Initialize Kokoro V1 backend."""
import time
start = time.perf_counter()
try: try:
self._device = self._determine_device() self._device = self._determine_device()
logger.info(f"Initializing Kokoro V1 on {self._device}") logger.info(f"Initializing Kokoro V1 on {self._device}")
self._backend = KokoroV1() self._backend = KokoroV1()
except Exception as e:
raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")
async def initialize_with_warmup(self, voice_manager) -> tuple[str, str, int]:
"""Initialize and warm up model.
Args:
voice_manager: Voice manager instance for warmup
Returns:
Tuple of (device, backend type, voice count)
Raises:
RuntimeError: If initialization fails
"""
import time
start = time.perf_counter()
try:
# Initialize backend
await self.initialize()
# Load model
model_path = self._config.pytorch_kokoro_v1_file model_path = self._config.pytorch_kokoro_v1_file
await self.load_model(model_path) await self.load_model(model_path)
# Use paths module to get voice path
try:
voices = await paths.list_voices()
voice_path = await paths.get_voice_path(settings.default_voice)
# Warm up with short text
warmup_text = "Warmup text for initialization."
# Use default voice name for warmup
voice_name = settings.default_voice
logger.debug(f"Using default voice '{voice_name}' for warmup")
async for _ in self.generate(warmup_text, (voice_name, voice_path)):
pass
except Exception as e:
raise RuntimeError(f"Failed to get default voice: {e}")
ms = int((time.perf_counter() - start) * 1000) ms = int((time.perf_counter() - start) * 1000)
logger.info(f"Warmup completed in {ms}ms") logger.info(f"Initialized in {ms}ms")
return self._device, "kokoro_v1", len(voices) return self._device
except FileNotFoundError as e:
logger.error("""
Model files not found! You need to download the Kokoro V1 model:
1. Download model using the script:
python docker/scripts/download_model.py --output api/src/models/v1_0
2. Or set environment variable in docker-compose:
DOWNLOAD_MODEL=true
""")
exit(0)
except Exception as e: except Exception as e:
raise RuntimeError(f"Warmup failed: {e}") raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")
def get_backend(self) -> BaseModelBackend: def get_backend(self) -> BaseModelBackend:
"""Get initialized backend. """Get initialized backend.

View file

@ -2,10 +2,8 @@
FastAPI OpenAI Compatible API FastAPI OpenAI Compatible API
""" """
import os
import sys import sys
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from pathlib import Path
import torch import torch
import uvicorn import uvicorn
@ -55,7 +53,7 @@ async def lifespan(app: FastAPI):
try: try:
model_manager = await get_manager() model_manager = await get_manager()
await get_voice_manager() await get_voice_manager()
device = await model_manager.load_voices() device = await model_manager.initialize()
except Exception as e: except Exception as e:
logger.error(f"Failed to initialize model: {e}") logger.error(f"Failed to initialize model: {e}")
@ -63,7 +61,6 @@ async def lifespan(app: FastAPI):
boundary = "" * 2 * 12 boundary = "" * 2 * 12
startup_msg = f""" startup_msg = f"""
{boundary} {boundary}
@ -74,7 +71,8 @@ async def lifespan(app: FastAPI):
{boundary} {boundary}
""" """
startup_msg += f"\nModel loaded on {device}" startup_msg += f"\nModel loaded on {device}"
if device == "mps": if device == "mps":
startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)" startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
@ -83,16 +81,11 @@ async def lifespan(app: FastAPI):
else: else:
startup_msg += "\nRunning on CPU" startup_msg += "\nRunning on CPU"
# Add web player info if enabled
if settings.enable_web_player: if settings.enable_web_player:
startup_msg += ( startup_msg += f"\nWeb UI: or http://localhost:{settings.port}/web/"
f"\n\nBeta Web Player: http://{settings.host}:{settings.port}/web/"
)
startup_msg += f"\nor http://localhost:{settings.port}/web/"
else: else:
startup_msg += "\n\nWeb Player: disabled" startup_msg += "\n\nWeb UI disabled."
startup_msg += f"\n{boundary}\n"
logger.info(startup_msg) logger.info(startup_msg)
yield yield

View file

@ -45,14 +45,14 @@ CMD ["./entrypoint.sh"]
# If you want to test the docker image locally, run this from the project root: # If you want to test the docker image locally, run this from the project root:
# docker build -f docker\gpu\Dockerfile -t evie/kokorotts . # docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
# Run it with: # Run it with:
# docker run -p 8880:8880 --name evie/kokorotts evie/kokorotts # docker run -p 8880:8880 --name kokorotts evie/kokorotts
# #
# You can log into the container with # You can log into the container with
# docker exec -it evie/kokorotts /bin/bash # docker exec -it kokorotts /bin/bash
# #
# Other commands: # Other commands:
# 1. Stop and remove container # 1. Stop and remove container
# docker container remove evie/kokorotts # docker container remove kokorotts
# #
# 2. List or remove images # 2. List or remove images
# docker images # docker images
@ -61,4 +61,14 @@ CMD ["./entrypoint.sh"]
# See docs for pushing to ECR # See docs for pushing to ECR
# https://docs.aws.amazon.com/AmazonECR/latest/userguide/getting-started-cli.html # https://docs.aws.amazon.com/AmazonECR/latest/userguide/getting-started-cli.html
# This is my private repo: # This is my private repo:
# https://eu-west-1.console.aws.amazon.com/ecr/private-registry/repositories?region=eu-west-1 # https://eu-west-1.console.aws.amazon.com/ecr/private-registry/repositories?region=eu-west-1
# I can get the dodcker push / pull commands from there.
#
# SSH keys for logging into EC2 containers:
# https://eu-west-1.console.aws.amazon.com/ec2/home?region=eu-west-1#KeyPairs:
#
# aws ecr get-login-password --region eu-west-1 | docker login --username AWS --password-stdin 678811077621.dkr.ecr.eu-west-1.amazonaws.com
# docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
# docker tag evie/kokorotts:latest 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
# docker push 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
# docker run --gpus all -p 8880:8880 --name kokorotts 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts

View file

@ -1,4 +1,4 @@
$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll" # $env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
$env:PYTHONUTF8=1 $env:PYTHONUTF8=1
$Env:PROJECT_ROOT="$pwd" $Env:PROJECT_ROOT="$pwd"
$Env:USE_GPU="true" $Env:USE_GPU="true"
@ -10,4 +10,4 @@ $Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"
uv pip install -e ".[gpu]" uv pip install -e ".[gpu]"
uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0 uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880 uvicorn api.src.main:app --host 0.0.0.0 --port 8880