mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
Add notes about performance inside docker compose
This commit is contained in:
parent
e9d9921612
commit
19ac239aeb
2 changed files with 31 additions and 137 deletions
|
@ -1,95 +0,0 @@
|
||||||
# UV Setup
|
|
||||||
Deprecated notes for myself
|
|
||||||
## Structure
|
|
||||||
```
|
|
||||||
docker/
|
|
||||||
├── cpu/
|
|
||||||
│ ├── pyproject.toml # CPU deps (torch CPU)
|
|
||||||
│ └── requirements.lock # CPU lockfile
|
|
||||||
├── gpu/
|
|
||||||
│ ├── pyproject.toml # GPU deps (torch CUDA)
|
|
||||||
│ └── requirements.lock # GPU lockfile
|
|
||||||
├── rocm/
|
|
||||||
│ ├── pyproject.toml # ROCM deps (torch ROCM)
|
|
||||||
│ └── requirements.lock # ROCM lockfile
|
|
||||||
└── shared/
|
|
||||||
└── pyproject.toml # Common deps
|
|
||||||
```
|
|
||||||
|
|
||||||
## Regenerate Lock Files
|
|
||||||
|
|
||||||
### CPU
|
|
||||||
```bash
|
|
||||||
cd docker/cpu
|
|
||||||
uv pip compile pyproject.toml ../shared/pyproject.toml --output-file requirements.lock
|
|
||||||
```
|
|
||||||
|
|
||||||
### GPU
|
|
||||||
```bash
|
|
||||||
cd docker/gpu
|
|
||||||
uv pip compile pyproject.toml ../shared/pyproject.toml --output-file requirements.lock
|
|
||||||
```
|
|
||||||
|
|
||||||
### ROCM
|
|
||||||
```bash
|
|
||||||
cd docker/rocm
|
|
||||||
uv pip compile pyproject.toml ../shared/pyproject.toml --output-file requirements.lock
|
|
||||||
```
|
|
||||||
|
|
||||||
## Local Dev Setup
|
|
||||||
|
|
||||||
### CPU
|
|
||||||
```bash
|
|
||||||
cd docker/cpu
|
|
||||||
uv venv
|
|
||||||
.venv\Scripts\activate # Windows
|
|
||||||
uv pip sync requirements.lock
|
|
||||||
```
|
|
||||||
|
|
||||||
### GPU
|
|
||||||
```bash
|
|
||||||
cd docker/gpu
|
|
||||||
uv venv
|
|
||||||
.venv\Scripts\activate # Windows
|
|
||||||
uv pip sync requirements.lock --extra-index-url https://download.pytorch.org/whl/cu121 --index-strategy unsafe-best-match
|
|
||||||
```
|
|
||||||
|
|
||||||
### ROCM
|
|
||||||
```bash
|
|
||||||
cd docker/rocm
|
|
||||||
uv venv
|
|
||||||
source .venv/bin/activate
|
|
||||||
# not tested on Windows
|
|
||||||
#.venv\Scripts\activate # Windows
|
|
||||||
uv pip sync requirements.lock --extra-index-url https://download.pytorch.org/whl/rocm6.2
|
|
||||||
```
|
|
||||||
|
|
||||||
### Run Server
|
|
||||||
```bash
|
|
||||||
# From project root with venv active:
|
|
||||||
uvicorn api.src.main:app --reload
|
|
||||||
```
|
|
||||||
|
|
||||||
## Docker
|
|
||||||
|
|
||||||
### CPU
|
|
||||||
```bash
|
|
||||||
cd docker/cpu
|
|
||||||
docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
### GPU
|
|
||||||
```bash
|
|
||||||
cd docker/gpu
|
|
||||||
docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
### ROCM
|
|
||||||
```bash
|
|
||||||
cd docker/rocm
|
|
||||||
docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
## Known Issues
|
|
||||||
- Module imports: Run server from project root
|
|
||||||
- PyTorch CUDA: Always use --extra-index-url and --index-strategy for GPU env
|
|
|
@ -1,44 +1,33 @@
|
||||||
name: kokoro-tts
|
|
||||||
services:
|
services:
|
||||||
kokoro-tts:
|
kokoro-tts:
|
||||||
# image: ghcr.io/remsky/kokoro-fastapi-rocm:v0.1.0
|
image: kprinssu/kokoro-fastapi:rocm
|
||||||
build:
|
devices:
|
||||||
context: ../..
|
- /dev/dri
|
||||||
dockerfile: docker/rocm/Dockerfile
|
- /dev/kfd
|
||||||
volumes:
|
security_opt:
|
||||||
- ../../api/src:/app/api/src # Mount src for development
|
- seccomp:unconfined
|
||||||
- ../../api/src/voices:/app/api/src/voices # Mount voices for persistence
|
cap_add:
|
||||||
ports:
|
- SYS_PTRACE
|
||||||
- "8880:8880"
|
restart: 'always'
|
||||||
environment:
|
volumes:
|
||||||
- PYTHONPATH=/app:/app/models
|
- ./kokoro-tts/config:/root/.config/miopen
|
||||||
- TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
|
- ./kokoro-tts/cache:/root/.cache/miopen
|
||||||
# This suppresses excessive warning logs. Probably not a good idea to suppress, but no other solution found
|
ports:
|
||||||
# (see https://github.com/ROCm/MIOpen/issues/2981)
|
- 8880:8880
|
||||||
- MIOPEN_LOG_LEVEL=3
|
environment:
|
||||||
devices:
|
- USE_GPU=true
|
||||||
- /dev/kfd:/dev/kfd
|
- TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1
|
||||||
- /dev/dri:/dev/dri
|
# IMPORTANT: ROCm's MIOpen librar will be slow if it has to figure out the optimal kernel shapes for each model
|
||||||
security_opt:
|
# See documentation on performancing tuning: https://github.com/ROCm/MIOpen/blob/develop/docs/conceptual/tuningdb.rst
|
||||||
- seccomp=unconfined
|
# The volumes above cache the MIOpen shape files and user database for subsequent runs
|
||||||
group_add:
|
#
|
||||||
- video
|
# Steps:
|
||||||
ipc: host
|
# 1. Run Kokoro once with the following environment variables set:
|
||||||
|
# - MIOPEN_ENABLE_LOGGING=1
|
||||||
# Gradio UI service
|
# - MIOPEN_ENABLE_LOGGING_CMD=1
|
||||||
gradio-ui:
|
# - MIOPEN_LOG_LEVEL=6
|
||||||
image: ghcr.io/remsky/kokoro-fastapi-ui:v0.1.0
|
# 2. Generate various recordings using sample data (e.g. first couple paragraphs of Dracula); this will be slow
|
||||||
# Uncomment below to build from source instead of using the released image
|
# 3. Comment out the previously set environment variables
|
||||||
# build:
|
# 4. Add the following environment variables to enable caching of model shapes:
|
||||||
# context: ../../ui
|
# - MIOPEN_ENABLE_LOGGING=0- MIOPEN_FIND_MODE=2
|
||||||
ports:
|
# 5. Restart the container and run Kokoro again, it should be much faster
|
||||||
- "7860:7860"
|
|
||||||
volumes:
|
|
||||||
- ../../ui/data:/app/ui/data
|
|
||||||
- ../../ui/app.py:/app/app.py # Mount app.py for hot reload
|
|
||||||
environment:
|
|
||||||
- GRADIO_WATCH=1 # Enable hot reloading
|
|
||||||
- PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
|
|
||||||
- DISABLE_LOCAL_SAVING=false # Set to 'true' to disable local saving and hide file view
|
|
||||||
- API_HOST=kokoro-tts # Set TTS service URL
|
|
||||||
- API_PORT=8880 # Set TTS service PORT
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue