Add preliminary Docker support for CPU deployment

This commit is contained in:
remsky 2024-12-31 04:51:21 -07:00
parent 32a35f9a5a
commit 0652056107
3 changed files with 90 additions and 4 deletions

43
Dockerfile.cpu Normal file
View file

@ -0,0 +1,43 @@
FROM ubuntu:22.04
# Install base system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3-pip \
python3-dev \
espeak-ng \
git \
libsndfile1 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install PyTorch CPU version
RUN pip3 install --no-cache-dir torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
# Install all other dependencies from requirements.txt
COPY requirements.txt .
RUN pip3 install --no-cache-dir -r requirements.txt
# Copy application code and model
COPY . /app/
# Set working directory
WORKDIR /app
# Run with Python unbuffered output for live logging
ENV PYTHONUNBUFFERED=1
# Create non-root user
RUN useradd -m -u 1000 appuser
# Create directories and set permissions
RUN mkdir -p /app/Kokoro-82M && \
chown -R appuser:appuser /app
# Switch to non-root user
USER appuser
# Set Python path (app first for our imports, then model dir for model imports)
ENV PYTHONPATH=/app:/app/Kokoro-82M
# Run FastAPI server with debug logging and reload
CMD ["uvicorn", "api.src.main:app", "--host", "0.0.0.0", "--port", "8880", "--log-level", "debug"]

View file

@ -8,7 +8,7 @@
[![Coverage](https://img.shields.io/badge/coverage-97%25-darkgreen)]() [![Coverage](https://img.shields.io/badge/coverage-97%25-darkgreen)]()
FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model, providing an OpenAI-compatible endpoint with: FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model, providing an OpenAI-compatible endpoint with:
- NVIDIA GPU acceleration enabled - NVIDIA GPU accelerated inference (or CPU) option
- automatic chunking/stitching for long texts - automatic chunking/stitching for long texts
- very fast generation time (~35-49x RTF) - very fast generation time (~35-49x RTF)
@ -24,10 +24,15 @@ FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text
git clone https://github.com/remsky/Kokoro-FastAPI.git git clone https://github.com/remsky/Kokoro-FastAPI.git
cd Kokoro-FastAPI cd Kokoro-FastAPI
# Start the API (will automatically clone source HF repo via git-lfs) # For GPU acceleration (requires NVIDIA GPU):
docker compose up --build docker compose up --build
# For CPU-only deployment (~10x slower, but doesn't require an NVIDIA GPU):
docker compose -f docker-compose.cpu.yml up --build
``` ```
Test all voices (from another terminal): Test all voices (from another terminal):
```bash ```bash
python examples/test_all_voices.py python examples/test_all_voices.py
@ -106,11 +111,12 @@ Key Performance Metrics:
## Features ## Features
- OpenAI-compatible API endpoints - OpenAI-compatible API endpoints
- GPU-accelerated inference - GPU-accelerated inference (if desired)
- Multiple audio formats: mp3, wav, opus, flac, (aac & pcm not implemented) - Multiple audio formats: mp3, wav, opus, flac, (aac & pcm not implemented)
- Natural Boundary Detection: - Natural Boundary Detection:
- Automatically splits and stitches at sentence boundaries to reduce artifacts and maintain performacne - Automatically splits and stitches at sentence boundaries to reduce artifacts and maintain performacne
*Note: CPU Inference is currently a very basic implementation, and not heavily tested*
## Model ## Model
@ -135,4 +141,3 @@ The full Apache 2.0 license text can be found at: https://www.apache.org/license
https://user-images.githubusercontent.com/338912d2-90f3-41fb-bca0-5db7b4e02287.mp4 https://user-images.githubusercontent.com/338912d2-90f3-41fb-bca0-5db7b4e02287.mp4
</div> </div>

38
docker-compose.cpu.yml Normal file
View file

@ -0,0 +1,38 @@
services:
model-fetcher:
image: datamachines/git-lfs:latest
volumes:
- ./Kokoro-82M:/app/Kokoro-82M
working_dir: /app/Kokoro-82M
command: >
sh -c "
if [ -z \"$(ls -A .)\" ]; then
git clone https://huggingface.co/hexgrad/Kokoro-82M . && \
git checkout 8228a351f87c8a6076502c1e3b7e72e821ebec9a;
touch .cloned;
else
touch .cloned;
fi;
tail -f /dev/null
"
healthcheck:
test: ["CMD", "test", "-f", ".cloned"]
interval: 1s
timeout: 1s
retries: 120
start_period: 1s
kokoro-tts:
build:
context: .
dockerfile: Dockerfile.cpu
volumes:
- ./api/src:/app/api/src
- ./Kokoro-82M:/app/Kokoro-82M
ports:
- "8880:8880"
environment:
- PYTHONPATH=/app:/app/Kokoro-82M
depends_on:
model-fetcher:
condition: service_healthy