Updated commands for pushing to and running from ECR.

Updated python startup logic for simplicity, considering that I don't do warmup anymore.
2025-08-05 16:48:53 +00:00 · 2025-06-09 11:17:13 +03:00 · 2025-06-09 11:17:13 +03:00 · f18ab7dff7
commit f18ab7dff7
parent 7fdfd66992
4 changed files with 33 additions and 78 deletions
--- a/api/src/inference/model_manager.py
+++ b/api/src/inference/model_manager.py
@ -4,17 +4,15 @@ from typing import Optional

 from loguru import logger

-from ..core import paths
-from ..core.config import settings
-from ..core.model_config import ModelConfig, model_config
 from .base import BaseModelBackend
 from .kokoro_v1 import KokoroV1
+from ..core.config import settings
+from ..core.model_config import ModelConfig, model_config


 class ModelManager:
    """Manages Kokoro V1 model loading and inference."""

-    # Singleton instance
    _instance = None

    def __init__(self, config: Optional[ModelConfig] = None):
@ -27,76 +25,30 @@ class ModelManager:
        self._backend: Optional[KokoroV1] = None  # Explicitly type as KokoroV1
        self._device: Optional[str] = None

-    def _determine_device(self) -> str:
+    @staticmethod
+    def _determine_device() -> str:
        """Determine device based on settings."""
        return "cuda" if settings.use_gpu else "cpu"

-    async def initialize(self) -> None:
+    async def initialize(self) -> str:
        """Initialize Kokoro V1 backend."""
+        import time
+        start = time.perf_counter()
+
        try:
            self._device = self._determine_device()
            logger.info(f"Initializing Kokoro V1 on {self._device}")
            self._backend = KokoroV1()

-        except Exception as e:
-            raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")
-
-    async def initialize_with_warmup(self, voice_manager) -> tuple[str, str, int]:
-        """Initialize and warm up model.
-
-        Args:
-            voice_manager: Voice manager instance for warmup
-
-        Returns:
-            Tuple of (device, backend type, voice count)
-
-        Raises:
-            RuntimeError: If initialization fails
-        """
-        import time
-
-        start = time.perf_counter()
-
-        try:
-            # Initialize backend
-            await self.initialize()
-
-            # Load model
            model_path = self._config.pytorch_kokoro_v1_file
            await self.load_model(model_path)

-            # Use paths module to get voice path
-            try:
-                voices = await paths.list_voices()
-                voice_path = await paths.get_voice_path(settings.default_voice)
-
-                # Warm up with short text
-                warmup_text = "Warmup text for initialization."
-                # Use default voice name for warmup
-                voice_name = settings.default_voice
-                logger.debug(f"Using default voice '{voice_name}' for warmup")
-                async for _ in self.generate(warmup_text, (voice_name, voice_path)):
-                    pass
-            except Exception as e:
-                raise RuntimeError(f"Failed to get default voice: {e}")
-
            ms = int((time.perf_counter() - start) * 1000)
-            logger.info(f"Warmup completed in {ms}ms")
+            logger.info(f"Initialized in {ms}ms")
            
-            return self._device, "kokoro_v1", len(voices)
-        except FileNotFoundError as e:
-            logger.error("""
-Model files not found! You need to download the Kokoro V1 model:
-
-1. Download model using the script:
-   python docker/scripts/download_model.py --output api/src/models/v1_0
-
-2. Or set environment variable in docker-compose:
-   DOWNLOAD_MODEL=true
-""")
-            exit(0)
+            return self._device
        except Exception as e:
-            raise RuntimeError(f"Warmup failed: {e}")
+            raise RuntimeError(f"Failed to initialize Kokoro V1: {e}")

    def get_backend(self) -> BaseModelBackend:
        """Get initialized backend.
--- a/api/src/main.py
+++ b/api/src/main.py
@ -2,10 +2,8 @@
 FastAPI OpenAI Compatible API
 """

-import os
 import sys
 from contextlib import asynccontextmanager
-from pathlib import Path

 import torch
 import uvicorn
@ -55,7 +53,7 @@ async def lifespan(app: FastAPI):
    try:
        model_manager = await get_manager()
        await get_voice_manager()
-        device = await model_manager.load_voices()
+        device = await model_manager.initialize()

    except Exception as e:
        logger.error(f"Failed to initialize model: {e}")
@ -63,7 +61,6 @@ async def lifespan(app: FastAPI):

    boundary = "░" * 2 * 12
    startup_msg = f"""
-
 {boundary}

    ╔═╗┌─┐┌─┐┌┬┐
@ -74,7 +71,8 @@ async def lifespan(app: FastAPI):
    ╩ ╩└─┘┴ ┴└─┘

 {boundary}
-                """
+"""
+
    startup_msg += f"\nModel loaded on {device}"
    if device == "mps":
        startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
@ -83,16 +81,11 @@ async def lifespan(app: FastAPI):
    else:
        startup_msg += "\nRunning on CPU"

-    # Add web player info if enabled
    if settings.enable_web_player:
-        startup_msg += (
-            f"\n\nBeta Web Player: http://{settings.host}:{settings.port}/web/"
-        )
-        startup_msg += f"\nor http://localhost:{settings.port}/web/"
+        startup_msg += f"\nWeb UI: or http://localhost:{settings.port}/web/"
    else:
-        startup_msg += "\n\nWeb Player: disabled"
+        startup_msg += "\n\nWeb UI disabled."

-    startup_msg += f"\n{boundary}\n"
    logger.info(startup_msg)

    yield
--- a/docker/gpu/Dockerfile
+++ b/docker/gpu/Dockerfile
@ -45,14 +45,14 @@ CMD ["./entrypoint.sh"]
 # If you want to test the docker image locally, run this from the project root:
 #   docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
 # Run it with:
-#   docker run -p 8880:8880 --name evie/kokorotts evie/kokorotts
+#   docker run -p 8880:8880 --name kokorotts evie/kokorotts
 #
 # You can log into the container with
-#   docker exec -it evie/kokorotts /bin/bash
+#   docker exec -it kokorotts /bin/bash
 #
 # Other commands:
 # 1. Stop and remove container
-#    docker container remove evie/kokorotts
+#    docker container remove kokorotts
 #
 # 2. List or remove images
 #    docker images
@ -62,3 +62,13 @@ CMD ["./entrypoint.sh"]
 #   https://docs.aws.amazon.com/AmazonECR/latest/userguide/getting-started-cli.html
 # This is my private repo:
 #   https://eu-west-1.console.aws.amazon.com/ecr/private-registry/repositories?region=eu-west-1
+# I can get the dodcker push / pull commands from there.
+#
+# SSH keys for logging into EC2 containers:
+#   https://eu-west-1.console.aws.amazon.com/ec2/home?region=eu-west-1#KeyPairs:
+#
+# aws ecr get-login-password --region eu-west-1 | docker login --username AWS --password-stdin 678811077621.dkr.ecr.eu-west-1.amazonaws.com
+# docker build -f docker\gpu\Dockerfile -t evie/kokorotts .
+# docker tag evie/kokorotts:latest 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
+# docker push 678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts:latest
+# docker run --gpus all -p 8880:8880 --name kokorotts  678811077621.dkr.ecr.eu-west-1.amazonaws.com/evie/kokorotts 
--- a/start-gpu.ps1
+++ b/start-gpu.ps1
@ -1,4 +1,4 @@
-$env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
+# $env:PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"
 $env:PYTHONUTF8=1
 $Env:PROJECT_ROOT="$pwd"
 $Env:USE_GPU="true"
@ -10,4 +10,4 @@ $Env:WEB_PLAYER_PATH="$Env:PROJECT_ROOT/web"

 uv pip install -e ".[gpu]"
 uv run --no-sync python docker/scripts/download_model.py --output api/src/models/v1_0
-uv run --no-sync uvicorn api.src.main:app --host 0.0.0.0 --port 8880
+uvicorn api.src.main:app --host 0.0.0.0 --port 8880