Add notes about performance inside docker compose

2025-08-05 16:48:53 +00:00 · 2025-03-31 14:01:29 -07:00 · 2025-03-31 14:01:29 -07:00 · 19ac239aeb
commit 19ac239aeb
parent e9d9921612
2 changed files with 31 additions and 137 deletions
--- a/MigrationWorkingNotes.md
+++ b/MigrationWorkingNotes.md
@ -1,95 +0,0 @@
-# UV Setup
-Deprecated notes for myself
-## Structure
-```
-docker/
-  ├── cpu/
-  │   ├── pyproject.toml     # CPU deps (torch CPU)
-  │   └── requirements.lock  # CPU lockfile
-  ├── gpu/
-  │   ├── pyproject.toml     # GPU deps (torch CUDA)
-  │   └── requirements.lock  # GPU lockfile
-  ├── rocm/
-  │   ├── pyproject.toml     # ROCM deps (torch ROCM)
-  │   └── requirements.lock  # ROCM lockfile
-  └── shared/
-      └── pyproject.toml     # Common deps
-```
-
-## Regenerate Lock Files
-
-### CPU
-```bash
-cd docker/cpu
-uv pip compile pyproject.toml ../shared/pyproject.toml --output-file requirements.lock
-```
-
-### GPU
-```bash
-cd docker/gpu
-uv pip compile pyproject.toml ../shared/pyproject.toml --output-file requirements.lock
-```
-
-### ROCM
-```bash
-cd docker/rocm
-uv pip compile pyproject.toml ../shared/pyproject.toml --output-file requirements.lock
-```
-
-## Local Dev Setup
-
-### CPU
-```bash
-cd docker/cpu
-uv venv
-.venv\Scripts\activate  # Windows
-uv pip sync requirements.lock
-```
-
-### GPU
-```bash
-cd docker/gpu
-uv venv
-.venv\Scripts\activate  # Windows
-uv pip sync requirements.lock --extra-index-url https://download.pytorch.org/whl/cu121 --index-strategy unsafe-best-match
-```
-
-### ROCM
-```bash
-cd docker/rocm
-uv venv
-source .venv/bin/activate
-# not tested on Windows
-#.venv\Scripts\activate  # Windows
-uv pip sync requirements.lock --extra-index-url https://download.pytorch.org/whl/rocm6.2
-```
-
-### Run Server
-```bash
-# From project root with venv active:
-uvicorn api.src.main:app --reload
-```
-
-## Docker
-
-### CPU
-```bash
-cd docker/cpu
-docker compose up
-```
-
-### GPU
-```bash
-cd docker/gpu
-docker compose up
-```
-
-### ROCM
-```bash
-cd docker/rocm
-docker compose up
-```
-
-## Known Issues
- Module imports: Run server from project root
- PyTorch CUDA: Always use --extra-index-url and --index-strategy for GPU env
--- a/docker/rocm/docker-compose.yml
+++ b/docker/rocm/docker-compose.yml
@ -1,44 +1,33 @@
-name: kokoro-tts
 services:
  kokoro-tts:
-    # image: ghcr.io/remsky/kokoro-fastapi-rocm:v0.1.0
-    build:
-      context: ../..
-      dockerfile: docker/rocm/Dockerfile
-    volumes:
-      - ../../api/src:/app/api/src  # Mount src for development
-      - ../../api/src/voices:/app/api/src/voices  # Mount voices for persistence
-    ports:
-      - "8880:8880"
-    environment:
-      - PYTHONPATH=/app:/app/models
-      - TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
-      # This suppresses excessive warning logs. Probably not a good idea to suppress, but no other solution found
-      # (see https://github.com/ROCm/MIOpen/issues/2981)
-      - MIOPEN_LOG_LEVEL=3
+      image: kprinssu/kokoro-fastapi:rocm
      devices:
-      - /dev/kfd:/dev/kfd
-      - /dev/dri:/dev/dri
+        - /dev/dri
+        - /dev/kfd
      security_opt:
-      - seccomp=unconfined
-    group_add:
-      - video
-    ipc: host
-
-  # Gradio UI service
-  gradio-ui:
-    image: ghcr.io/remsky/kokoro-fastapi-ui:v0.1.0
-    # Uncomment below to build from source instead of using the released image
-    # build:
-      # context: ../../ui
-    ports:
-      - "7860:7860"
+        - seccomp:unconfined
+      cap_add:
+        - SYS_PTRACE
+      restart: 'always'
      volumes:
-      - ../../ui/data:/app/ui/data
-      - ../../ui/app.py:/app/app.py  # Mount app.py for hot reload
+        - ./kokoro-tts/config:/root/.config/miopen
+        - ./kokoro-tts/cache:/root/.cache/miopen
+      ports:
+        - 8880:8880
      environment:
-      - GRADIO_WATCH=1  # Enable hot reloading
-      - PYTHONUNBUFFERED=1  # Ensure Python output is not buffered
-      - DISABLE_LOCAL_SAVING=false  # Set to 'true' to disable local saving and hide file view
-      - API_HOST=kokoro-tts  # Set TTS service URL
-      - API_PORT=8880  # Set TTS service PORT
+        - USE_GPU=true
+        - TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
+        # IMPORTANT: ROCm's MIOpen librar will be slow if it has to figure out the optimal kernel shapes for each model
+        # See documentation on performancing tuning: https://github.com/ROCm/MIOpen/blob/develop/docs/conceptual/tuningdb.rst
+        # The volumes above cache the MIOpen shape files and user database for subsequent runs
+        #
+        # Steps:
+        # 1. Run Kokoro once with the following environment variables set:
+        #      - MIOPEN_ENABLE_LOGGING=1
+        #      - MIOPEN_ENABLE_LOGGING_CMD=1
+        #      - MIOPEN_LOG_LEVEL=6
+        # 2. Generate various recordings using sample data (e.g. first couple paragraphs of Dracula); this will be slow
+        # 3. Comment out the previously set environment variables
+        # 4. Add the following environment variables to enable caching of model shapes:
+        #       - MIOPEN_ENABLE_LOGGING=0- MIOPEN_FIND_MODE=2
+        # 5. Restart the container and run Kokoro again, it should be much faster