mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
Add preliminary Docker support for CPU deployment
This commit is contained in:
parent
32a35f9a5a
commit
0652056107
3 changed files with 90 additions and 4 deletions
43
Dockerfile.cpu
Normal file
43
Dockerfile.cpu
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
FROM ubuntu:22.04
|
||||||
|
|
||||||
|
# Install base system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
python3-pip \
|
||||||
|
python3-dev \
|
||||||
|
espeak-ng \
|
||||||
|
git \
|
||||||
|
libsndfile1 \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install PyTorch CPU version
|
||||||
|
RUN pip3 install --no-cache-dir torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
|
|
||||||
|
# Install all other dependencies from requirements.txt
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code and model
|
||||||
|
COPY . /app/
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Run with Python unbuffered output for live logging
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN useradd -m -u 1000 appuser
|
||||||
|
|
||||||
|
# Create directories and set permissions
|
||||||
|
RUN mkdir -p /app/Kokoro-82M && \
|
||||||
|
chown -R appuser:appuser /app
|
||||||
|
|
||||||
|
# Switch to non-root user
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Set Python path (app first for our imports, then model dir for model imports)
|
||||||
|
ENV PYTHONPATH=/app:/app/Kokoro-82M
|
||||||
|
|
||||||
|
# Run FastAPI server with debug logging and reload
|
||||||
|
CMD ["uvicorn", "api.src.main:app", "--host", "0.0.0.0", "--port", "8880", "--log-level", "debug"]
|
13
README.md
13
README.md
|
@ -8,7 +8,7 @@
|
||||||
[]()
|
[]()
|
||||||
|
|
||||||
FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model, providing an OpenAI-compatible endpoint with:
|
FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model, providing an OpenAI-compatible endpoint with:
|
||||||
- NVIDIA GPU acceleration enabled
|
- NVIDIA GPU accelerated inference (or CPU) option
|
||||||
- automatic chunking/stitching for long texts
|
- automatic chunking/stitching for long texts
|
||||||
- very fast generation time (~35-49x RTF)
|
- very fast generation time (~35-49x RTF)
|
||||||
|
|
||||||
|
@ -24,10 +24,15 @@ FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text
|
||||||
git clone https://github.com/remsky/Kokoro-FastAPI.git
|
git clone https://github.com/remsky/Kokoro-FastAPI.git
|
||||||
cd Kokoro-FastAPI
|
cd Kokoro-FastAPI
|
||||||
|
|
||||||
# Start the API (will automatically clone source HF repo via git-lfs)
|
# For GPU acceleration (requires NVIDIA GPU):
|
||||||
docker compose up --build
|
docker compose up --build
|
||||||
|
|
||||||
|
# For CPU-only deployment (~10x slower, but doesn't require an NVIDIA GPU):
|
||||||
|
docker compose -f docker-compose.cpu.yml up --build
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Test all voices (from another terminal):
|
Test all voices (from another terminal):
|
||||||
```bash
|
```bash
|
||||||
python examples/test_all_voices.py
|
python examples/test_all_voices.py
|
||||||
|
@ -106,11 +111,12 @@ Key Performance Metrics:
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- OpenAI-compatible API endpoints
|
- OpenAI-compatible API endpoints
|
||||||
- GPU-accelerated inference
|
- GPU-accelerated inference (if desired)
|
||||||
- Multiple audio formats: mp3, wav, opus, flac, (aac & pcm not implemented)
|
- Multiple audio formats: mp3, wav, opus, flac, (aac & pcm not implemented)
|
||||||
- Natural Boundary Detection:
|
- Natural Boundary Detection:
|
||||||
- Automatically splits and stitches at sentence boundaries to reduce artifacts and maintain performacne
|
- Automatically splits and stitches at sentence boundaries to reduce artifacts and maintain performacne
|
||||||
|
|
||||||
|
*Note: CPU Inference is currently a very basic implementation, and not heavily tested*
|
||||||
|
|
||||||
## Model
|
## Model
|
||||||
|
|
||||||
|
@ -135,4 +141,3 @@ The full Apache 2.0 license text can be found at: https://www.apache.org/license
|
||||||
https://user-images.githubusercontent.com/338912d2-90f3-41fb-bca0-5db7b4e02287.mp4
|
https://user-images.githubusercontent.com/338912d2-90f3-41fb-bca0-5db7b4e02287.mp4
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
38
docker-compose.cpu.yml
Normal file
38
docker-compose.cpu.yml
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
services:
|
||||||
|
model-fetcher:
|
||||||
|
image: datamachines/git-lfs:latest
|
||||||
|
volumes:
|
||||||
|
- ./Kokoro-82M:/app/Kokoro-82M
|
||||||
|
working_dir: /app/Kokoro-82M
|
||||||
|
command: >
|
||||||
|
sh -c "
|
||||||
|
if [ -z \"$(ls -A .)\" ]; then
|
||||||
|
git clone https://huggingface.co/hexgrad/Kokoro-82M . && \
|
||||||
|
git checkout 8228a351f87c8a6076502c1e3b7e72e821ebec9a;
|
||||||
|
touch .cloned;
|
||||||
|
else
|
||||||
|
touch .cloned;
|
||||||
|
fi;
|
||||||
|
tail -f /dev/null
|
||||||
|
"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "test", "-f", ".cloned"]
|
||||||
|
interval: 1s
|
||||||
|
timeout: 1s
|
||||||
|
retries: 120
|
||||||
|
start_period: 1s
|
||||||
|
|
||||||
|
kokoro-tts:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.cpu
|
||||||
|
volumes:
|
||||||
|
- ./api/src:/app/api/src
|
||||||
|
- ./Kokoro-82M:/app/Kokoro-82M
|
||||||
|
ports:
|
||||||
|
- "8880:8880"
|
||||||
|
environment:
|
||||||
|
- PYTHONPATH=/app:/app/Kokoro-82M
|
||||||
|
depends_on:
|
||||||
|
model-fetcher:
|
||||||
|
condition: service_healthy
|
Loading…
Add table
Reference in a new issue