Updated commands for pushing to and running from ECR.

Updated python startup logic for simplicity, considering that I don't do warmup anymore.
2025-08-05 16:48:53 +00:00 · 2025-06-09 11:17:13 +03:00 · 2025-06-09 11:17:13 +03:00 · f18ab7dff7
commit f18ab7dff7
parent 7fdfd66992
4 changed files with 33 additions and 78 deletions
--- a/api/src/inference/model_manager.py
+++ b/api/src/inference/model_manager.py
@ -4,17 +4,15 @@ from typing import Optional
 from loguru import logger
 from ..core import paths
 from ..core.config import settings
 from ..core.model_config import ModelConfig, model_config
 from .base import BaseModelBackend
 from .kokoro_v1 import KokoroV1
 from ..core.config import settings
 from ..core.model_config import ModelConfig, model_config
 class ModelManager:
    """Manages Kokoro V1 model loading and inference."""
    # Singleton instance
    _instance = None
    def __init__(self, config: Optional[ModelConfig] = None):
@ -27,76 +25,30 @@ class ModelManager:
        self._backend: Optional[KokoroV1] = None  # Explicitly type as KokoroV1
        self._device: Optional[str] = None
-    def _determine_device(self) -> str:
+    @staticmethod
    def _determine_device() -> str:
        """Determine device based on settings."""
        return "cuda" if settings.use_gpu else "cpu"
-    async def initialize(self) -> None:
+    async def initialize(self) -> str:
        """Initialize Kokoro V1 backend."""
        import time
        start = time.perf_counter()
        try:
            self._device = self._determine_device()
            logger.info(f"Initializing Kokoro V1 on {self._device}")
            self._backend = KokoroV1()
        except Exception as e:
            raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")
    async def initialize_with_warmup(self, voice_manager) -> tuple[str, str, int]:
        """Initialize and warm up model.
        Args:
            voice_manager: Voice manager instance for warmup
        Returns:
            Tuple of (device, backend type, voice count)
        Raises:
            RuntimeError: If initialization fails
        """
        import time
        start = time.perf_counter()
        try:
            # Initialize backend
            await self.initialize()
            # Load model
            model_path = self._config.pytorch_kokoro_v1_file
            await self.load_model(model_path)
            # Use paths module to get voice path
            try:
                voices = await paths.list_voices()
                voice_path = await paths.get_voice_path(settings.default_voice)
                # Warm up with short text
                warmup_text = "Warmup text for initialization."
                # Use default voice name for warmup
                voice_name = settings.default_voice
                logger.debug(f"Using default voice '{voice_name}' for warmup")
                async for _ in self.generate(warmup_text, (voice_name, voice_path)):
                    pass
            except Exception as e:
                raise RuntimeError(f"Failed to get default voice: {e}")
            ms = int((time.perf_counter() - start) * 1000)
-            logger.info(f"Warmup completed in {ms}ms")
+            logger.info(f"Initialized in {ms}ms")
-
+            
-            return self._device, "kokoro_v1", len(voices)
+            return self._device
        except FileNotFoundError as e:
            logger.error("""
 Model files not found! You need to download the Kokoro V1 model:
 1. Download model using the script:
   python docker/scripts/download_model.py --output api/src/models/v1_0
 2. Or set environment variable in docker-compose:
   DOWNLOAD_MODEL=true
 """)
            exit(0)
        except Exception as e:
-            raise RuntimeError(f"Warmup failed: {e}")
+            raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")
    def get_backend(self) -> BaseModelBackend:
        """Get initialized backend.
--- a/api/src/main.py
+++ b/api/src/main.py
@ -2,10 +2,8 @@
 FastAPI OpenAI Compatible API
 """
 import os
 import sys
 from contextlib import asynccontextmanager
 from pathlib import Path
 import torch
 import uvicorn
@ -55,7 +53,7 @@ async def lifespan(app: FastAPI):
    try:
        model_manager = await get_manager()
        await get_voice_manager()
-        device = await model_manager.load_voices()
+        device = await model_manager.initialize()
    except Exception as e:
        logger.error(f"Failed to initialize model: {e}")
@ -63,7 +61,6 @@ async def lifespan(app: FastAPI):
    boundary = "░" * 2 * 12
    startup_msg = f"""
 {boundary}
    ╔═╗┌─┐┌─┐┌┬┐
@ -74,7 +71,8 @@ async def lifespan(app: FastAPI):
    ╩ ╩└─┘┴ ┴└─┘
 {boundary}
-                """
+"""
    startup_msg += f"\nModel loaded on {device}"
    if device == "mps":
        startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
@ -83,16 +81,11 @@ async def lifespan(app: FastAPI):
    else:
        startup_msg += "\nRunning on CPU"
    # Add web player info if enabled
    if settings.enable_web_player:
-        startup_msg += (
+        startup_msg += f"\nWeb UI: or http://localhost:{settings.port}/web/"
            f"\n\nBeta Web Player: http://{settings.host}:{settings.port}/web/"
        )
        startup_msg += f"\nor http://localhost:{settings.port}/web/"
    else:
-        startup_msg += "\n\nWeb Player: disabled"
+        startup_msg += "\n\nWeb UI disabled."
    startup_msg += f"\n{boundary}\n"
    logger.info(startup_msg)
    yield
--- a/docker/gpu/Dockerfile
+++ b/docker/gpu/Dockerfile
@ -45,14 +45,14 @@ CMD ["./entrypoint.sh"]
 # If you want to test the docker image locally, run this from the project root:
 #   docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
 # Run it with:
-#   docker run -p 8880:8880 --name evie/kokorotts evie/kokorotts
+#   docker run -p 8880:8880 --name kokorotts evie/kokorotts
 #
 # You can log into the container with
-#   docker exec -it evie/kokorotts /bin/bash
+#   docker exec -it kokorotts /bin/bash
 #
 # Other commands:
 # 1. Stop and remove container
-#    docker container remove evie/kokorotts
+#    docker container remove kokorotts
 #
 # 2. List or remove images
 #    docker images
@ -61,4 +61,14 @@ CMD ["./entrypoint.sh"]
 # See docs for pushing to ECR
 #   https://docs.aws.amazon.com/AmazonECR/latest/userguide/getting-started-cli.html
 # This is my private repo:
-#   https://eu-west-1.console.aws.amazon.com/ecr/private-registry/repositories?region=eu-west-1
+#   https://eu-west-1.console.aws.amazon.com/ecr/private-registry/repositories?region=eu-west-1
 # I can get the dodcker push / pull commands from there.
 #
 # SSH keys for logging into EC2 containers:
 #   https://eu-west-1.console.aws.amazon.com/ec2/home?region=eu-west-1#KeyPairs:
 #
 # aws ecr get-login-password --region eu-west-1 | docker login --username AWS --password-stdin 678811077621.dkr.ecr.eu-west-1.amazonaws.com
 # docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
 # docker tag evie/kokorotts:latest 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
 # docker push 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
 # docker run --gpus all -p 8880:8880 --name kokorotts  678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts 
--- a/start-gpu.ps1
+++ b/start-gpu.ps1
@ -1,4 +1,4 @@
-$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
+# $env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
 $env:PYTHONUTF8=1
 $Env:PROJECT_ROOT="$pwd"
 $Env:USE_GPU="true"
@ -10,4 +10,4 @@ $Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"
 uv pip install -e ".[gpu]"
 uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
+uvicorn api.src.main:app --host 0.0.0.0 --port 8880