diff --git a/api/src/inference/model_manager.py b/api/src/inference/model_manager.py index 9cef95f..5f9c74d 100644 --- a/api/src/inference/model_manager.py +++ b/api/src/inference/model_manager.py @@ -4,17 +4,15 @@ from typing import Optional from loguru import logger -from ..core import paths -from ..core.config import settings -from ..core.model_config import ModelConfig, model_config from .base import BaseModelBackend from .kokoro_v1 import KokoroV1 +from ..core.config import settings +from ..core.model_config import ModelConfig, model_config class ModelManager: """Manages Kokoro V1 model loading and inference.""" - # Singleton instance _instance = None def __init__(self, config: Optional[ModelConfig] = None): @@ -27,76 +25,30 @@ class ModelManager: self._backend: Optional[KokoroV1] = None # Explicitly type as KokoroV1 self._device: Optional[str] = None - def _determine_device(self) -> str: + @staticmethod + def _determine_device() -> str: """Determine device based on settings.""" return "cuda" if settings.use_gpu else "cpu" - async def initialize(self) -> None: + async def initialize(self) -> str: """Initialize Kokoro V1 backend.""" + import time + start = time.perf_counter() + try: self._device = self._determine_device() logger.info(f"Initializing Kokoro V1 on {self._device}") self._backend = KokoroV1() - except Exception as e: - raise RuntimeError(f"Failed to initialize Kokoro V1: {e}") - - async def initialize_with_warmup(self, voice_manager) -> tuple[str, str, int]: - """Initialize and warm up model. - - Args: - voice_manager: Voice manager instance for warmup - - Returns: - Tuple of (device, backend type, voice count) - - Raises: - RuntimeError: If initialization fails - """ - import time - - start = time.perf_counter() - - try: - # Initialize backend - await self.initialize() - - # Load model model_path = self._config.pytorch_kokoro_v1_file await self.load_model(model_path) - # Use paths module to get voice path - try: - voices = await paths.list_voices() - voice_path = await paths.get_voice_path(settings.default_voice) - - # Warm up with short text - warmup_text = "Warmup text for initialization." - # Use default voice name for warmup - voice_name = settings.default_voice - logger.debug(f"Using default voice '{voice_name}' for warmup") - async for _ in self.generate(warmup_text, (voice_name, voice_path)): - pass - except Exception as e: - raise RuntimeError(f"Failed to get default voice: {e}") - ms = int((time.perf_counter() - start) * 1000) - logger.info(f"Warmup completed in {ms}ms") - - return self._device, "kokoro_v1", len(voices) - except FileNotFoundError as e: - logger.error(""" -Model files not found! You need to download the Kokoro V1 model: - -1. Download model using the script: - python docker/scripts/download_model.py --output api/src/models/v1_0 - -2. Or set environment variable in docker-compose: - DOWNLOAD_MODEL=true -""") - exit(0) + logger.info(f"Initialized in {ms}ms") + + return self._device except Exception as e: - raise RuntimeError(f"Warmup failed: {e}") + raise RuntimeError(f"Failed to initialize Kokoro V1: {e}") def get_backend(self) -> BaseModelBackend: """Get initialized backend. diff --git a/api/src/main.py b/api/src/main.py index 6883c3a..d78d51f 100644 --- a/api/src/main.py +++ b/api/src/main.py @@ -2,10 +2,8 @@ FastAPI OpenAI Compatible API """ -import os import sys from contextlib import asynccontextmanager -from pathlib import Path import torch import uvicorn @@ -55,7 +53,7 @@ async def lifespan(app: FastAPI): try: model_manager = await get_manager() await get_voice_manager() - device = await model_manager.load_voices() + device = await model_manager.initialize() except Exception as e: logger.error(f"Failed to initialize model: {e}") @@ -63,7 +61,6 @@ async def lifespan(app: FastAPI): boundary = "░" * 2 * 12 startup_msg = f""" - {boundary} ╔═╗┌─┐┌─┐┌┬┐ @@ -74,7 +71,8 @@ async def lifespan(app: FastAPI): ╩ ╩└─┘┴ ┴└─┘ {boundary} - """ +""" + startup_msg += f"\nModel loaded on {device}" if device == "mps": startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)" @@ -83,16 +81,11 @@ async def lifespan(app: FastAPI): else: startup_msg += "\nRunning on CPU" - # Add web player info if enabled if settings.enable_web_player: - startup_msg += ( - f"\n\nBeta Web Player: http://{settings.host}:{settings.port}/web/" - ) - startup_msg += f"\nor http://localhost:{settings.port}/web/" + startup_msg += f"\nWeb UI: or http://localhost:{settings.port}/web/" else: - startup_msg += "\n\nWeb Player: disabled" + startup_msg += "\n\nWeb UI disabled." - startup_msg += f"\n{boundary}\n" logger.info(startup_msg) yield diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index f1e9f04..d259d4f 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -45,14 +45,14 @@ CMD ["./entrypoint.sh"] # If you want to test the docker image locally, run this from the project root: # docker build -f docker\gpu\Dockerfile -t evie/kokorotts . # Run it with: -# docker run -p 8880:8880 --name evie/kokorotts evie/kokorotts +# docker run -p 8880:8880 --name kokorotts evie/kokorotts # # You can log into the container with -# docker exec -it evie/kokorotts /bin/bash +# docker exec -it kokorotts /bin/bash # # Other commands: # 1. Stop and remove container -# docker container remove evie/kokorotts +# docker container remove kokorotts # # 2. List or remove images # docker images @@ -61,4 +61,14 @@ CMD ["./entrypoint.sh"] # See docs for pushing to ECR # https://docs.aws.amazon.com/AmazonECR/latest/userguide/getting-started-cli.html # This is my private repo: -# https://eu-west-1.console.aws.amazon.com/ecr/private-registry/repositories?region=eu-west-1 \ No newline at end of file +# https://eu-west-1.console.aws.amazon.com/ecr/private-registry/repositories?region=eu-west-1 +# I can get the dodcker push / pull commands from there. +# +# SSH keys for logging into EC2 containers: +# https://eu-west-1.console.aws.amazon.com/ec2/home?region=eu-west-1#KeyPairs: +# +# aws ecr get-login-password --region eu-west-1 | docker login --username AWS --password-stdin 678811077621.dkr.ecr.eu-west-1.amazonaws.com +# docker build -f docker\gpu\Dockerfile -t evie/kokorotts . +# docker tag evie/kokorotts:latest 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest +# docker push 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest +# docker run --gpus all -p 8880:8880 --name kokorotts 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts \ No newline at end of file diff --git a/start-gpu.ps1 b/start-gpu.ps1 index 7b161a5..52af7fe 100644 --- a/start-gpu.ps1 +++ b/start-gpu.ps1 @@ -1,4 +1,4 @@ -$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll" +# $env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll" $env:PYTHONUTF8=1 $Env:PROJECT_ROOT="$pwd" $Env:USE_GPU="true" @@ -10,4 +10,4 @@ $Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web" uv pip install -e ".[gpu]" uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0 -uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880 \ No newline at end of file +uvicorn api.src.main:app --host 0.0.0.0 --port 8880 \ No newline at end of file