Kokoro-FastAPI/docker-compose.cpu.yml

services:
  model-fetcher:
    image: datamachines/git-lfs:latest
    volumes:
      - ./Kokoro-82M:/app/Kokoro-82M
    working_dir: /app/Kokoro-82M
    command: >
      sh -c "
        mkdir -p /app/Kokoro-82M;
        cd /app/Kokoro-82M;
        rm -f .git/index.lock;
        if [ -z \"$(ls -A .)\" ]; then
          git clone https://huggingface.co/hexgrad/Kokoro-82M .
          touch .cloned;
        else
          rm -f .git/index.lock && \
          git checkout main && \
          git pull origin main && \
          touch .cloned;
        fi;
        tail -f /dev/null
      "
    healthcheck:
      test: ["CMD", "test", "-f", ".cloned"]
      interval: 5s
      timeout: 2s
      retries: 300
      start_period: 1s

  kokoro-tts:
    # image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
    # Uncomment below (and comment out above) to build from source instead of using the released image
    build:
      context: .
      dockerfile: Dockerfile.cpu
    volumes:
      - ./api/src:/app/api/src
      - ./Kokoro-82M:/app/Kokoro-82M
    ports:
      - "8880:8880"
    environment:
      - PYTHONPATH=/app:/app/Kokoro-82M
      # ONNX Optimization Settings for vectorized operations
      - ONNX_NUM_THREADS=8  # Maximize core usage for vectorized ops
      - ONNX_INTER_OP_THREADS=4  # Higher inter-op for parallel matrix operations
      - ONNX_EXECUTION_MODE=parallel
      - ONNX_OPTIMIZATION_LEVEL=all
      - ONNX_MEMORY_PATTERN=true
      - ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo

    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
      interval: 10s
      timeout: 5s
      retries: 30
      start_period: 30s
    depends_on:
      model-fetcher:
        condition: service_healthy

  # Gradio UI service [Comment out everything below if you don't need it]
  gradio-ui:
    image: ghcr.io/remsky/kokoro-fastapi-ui:latest
    # Uncomment below (and comment out above) to build from source instead of using the released image
    # build:
    #   context: ./ui
    ports:
      - "7860:7860"
    volumes:
      - ./ui/data:/app/ui/data
      - ./ui/app.py:/app/app.py  # Mount app.py for hot reload
    environment:
      - GRADIO_WATCH=True  # Enable hot reloading
      - PYTHONUNBUFFERED=1  # Ensure Python output is not buffered
    depends_on:
      kokoro-tts:
        condition: service_healthy