services: model-fetcher: image: datamachines/git-lfs:latest volumes: - ./Kokoro-82M:/app/Kokoro-82M working_dir: /app/Kokoro-82M command: > sh -c " mkdir -p /app/Kokoro-82M; cd /app/Kokoro-82M; rm -f .git/index.lock; if [ -z \"$(ls -A .)\" ]; then git clone https://huggingface.co/hexgrad/Kokoro-82M . touch .cloned; else rm -f .git/index.lock && \ git checkout main && \ git pull origin main && \ touch .cloned; fi; tail -f /dev/null " healthcheck: test: ["CMD", "test", "-f", ".cloned"] interval: 5s timeout: 2s retries: 300 start_period: 1s kokoro-tts: # image: ghcr.io/remsky/kokoro-fastapi-cpu:latest # Uncomment below (and comment out above) to build from source instead of using the released image build: context: . dockerfile: Dockerfile.cpu volumes: - ./api/src:/app/api/src - ./Kokoro-82M:/app/Kokoro-82M ports: - "8880:8880" environment: - PYTHONPATH=/app:/app/Kokoro-82M # ONNX Optimization Settings for vectorized operations - ONNX_NUM_THREADS=8 # Maximize core usage for vectorized ops - ONNX_INTER_OP_THREADS=4 # Higher inter-op for parallel matrix operations - ONNX_EXECUTION_MODE=parallel - ONNX_OPTIMIZATION_LEVEL=all - ONNX_MEMORY_PATTERN=true - ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8880/health"] interval: 10s timeout: 5s retries: 30 start_period: 30s depends_on: model-fetcher: condition: service_healthy # Gradio UI service [Comment out everything below if you don't need it] gradio-ui: image: ghcr.io/remsky/kokoro-fastapi-ui:latest # Uncomment below (and comment out above) to build from source instead of using the released image # build: # context: ./ui ports: - "7860:7860" volumes: - ./ui/data:/app/ui/data - ./ui/app.py:/app/app.py # Mount app.py for hot reload environment: - GRADIO_WATCH=True # Enable hot reloading - PYTHONUNBUFFERED=1 # Ensure Python output is not buffered depends_on: kokoro-tts: condition: service_healthy