Add preliminary Docker support for CPU deployment

2025-04-13 09:39:17 +00:00 · 2024-12-31 04:51:21 -07:00 · 2024-12-31 04:51:21 -07:00 · 0652056107
commit 0652056107
parent 32a35f9a5a
3 changed files with 90 additions and 4 deletions
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@ -0,0 +1,43 @@
+FROM ubuntu:22.04
+
+# Install base system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3-pip \
+    python3-dev \
+    espeak-ng \
+    git \
+    libsndfile1 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install PyTorch CPU version
+RUN pip3 install --no-cache-dir torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
+
+# Install all other dependencies from requirements.txt
+COPY requirements.txt .
+RUN pip3 install --no-cache-dir -r requirements.txt
+
+# Copy application code and model
+COPY . /app/
+
+# Set working directory
+WORKDIR /app
+
+# Run with Python unbuffered output for live logging
+ENV PYTHONUNBUFFERED=1
+
+# Create non-root user
+RUN useradd -m -u 1000 appuser
+
+# Create directories and set permissions
+RUN mkdir -p /app/Kokoro-82M && \
+    chown -R appuser:appuser /app
+
+# Switch to non-root user
+USER appuser
+
+# Set Python path (app first for our imports, then model dir for model imports)
+ENV PYTHONPATH=/app:/app/Kokoro-82M
+
+# Run FastAPI server with debug logging and reload
+CMD ["uvicorn", "api.src.main:app", "--host", "0.0.0.0", "--port", "8880", "--log-level", "debug"]
--- a/README.md
+++ b/README.md
@ -8,7 +8,7 @@
 [![Coverage](https://img.shields.io/badge/coverage-97%25-darkgreen)]()

 FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model, providing an OpenAI-compatible endpoint with:
- NVIDIA GPU acceleration enabled
+- NVIDIA GPU accelerated inference (or CPU) option
 - automatic chunking/stitching for long texts
 - very fast generation time (~35-49x RTF)

@ -24,10 +24,15 @@ FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text
 git clone https://github.com/remsky/Kokoro-FastAPI.git
 cd Kokoro-FastAPI

-# Start the API (will automatically clone source HF repo via git-lfs)
+# For GPU acceleration (requires NVIDIA GPU):
 docker compose up --build
+
+# For CPU-only deployment (~10x slower, but doesn't require an NVIDIA GPU):
+docker compose -f docker-compose.cpu.yml up --build
 ```

+
+
 Test all voices (from another terminal):
 ```bash
 python examples/test_all_voices.py
@ -106,11 +111,12 @@ Key Performance Metrics:
 ## Features

 - OpenAI-compatible API endpoints
- GPU-accelerated inference
+- GPU-accelerated inference (if desired)
 - Multiple audio formats: mp3, wav, opus, flac, (aac & pcm not implemented)
 - Natural Boundary Detection:
    - Automatically splits and stitches at sentence boundaries to reduce artifacts and maintain performacne

+*Note: CPU Inference is currently a very basic implementation, and not heavily tested*

 ## Model

@ -135,4 +141,3 @@ The full Apache 2.0 license text can be found at: https://www.apache.org/license
  https://user-images.githubusercontent.com/338912d2-90f3-41fb-bca0-5db7b4e02287.mp4
  
 </div>
-
--- a/docker-compose.cpu.yml
+++ b/docker-compose.cpu.yml
@ -0,0 +1,38 @@
+services:
+  model-fetcher:
+    image: datamachines/git-lfs:latest
+    volumes:
+      - ./Kokoro-82M:/app/Kokoro-82M
+    working_dir: /app/Kokoro-82M
+    command: >
+      sh -c "
+        if [ -z \"$(ls -A .)\" ]; then
+          git clone https://huggingface.co/hexgrad/Kokoro-82M . && \
+          git checkout 8228a351f87c8a6076502c1e3b7e72e821ebec9a;
+          touch .cloned;
+        else
+          touch .cloned;
+        fi;
+        tail -f /dev/null
+      "
+    healthcheck:
+      test: ["CMD", "test", "-f", ".cloned"]
+      interval: 1s
+      timeout: 1s
+      retries: 120
+      start_period: 1s
+
+  kokoro-tts:
+    build:
+      context: .
+      dockerfile: Dockerfile.cpu
+    volumes:
+      - ./api/src:/app/api/src
+      - ./Kokoro-82M:/app/Kokoro-82M
+    ports:
+      - "8880:8880"
+    environment:
+      - PYTHONPATH=/app:/app/Kokoro-82M
+    depends_on:
+      model-fetcher:
+        condition: service_healthy