mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
feat: merge master into core/uv-management for v0.1.0
Major changes: - Baked model directly into Dockerfile for improved deployment - Switched to uv for dependency management - Restructured Docker files into docker/cpu and docker/gpu directories - Updated configuration for better ONNX performance
This commit is contained in:
commit
007b1a35e8
8 changed files with 292 additions and 190 deletions
|
@ -1,6 +1,5 @@
|
||||||
# Version control
|
# Version control
|
||||||
.git
|
.git
|
||||||
.gitignore
|
|
||||||
|
|
||||||
# Python
|
# Python
|
||||||
__pycache__
|
__pycache__
|
||||||
|
|
92
.github/workflows/sync-develop.yml
vendored
92
.github/workflows/sync-develop.yml
vendored
|
@ -1,55 +1,55 @@
|
||||||
name: Sync develop with master
|
# name: Sync develop with master
|
||||||
|
|
||||||
on:
|
# on:
|
||||||
push:
|
# push:
|
||||||
branches:
|
# branches:
|
||||||
- master
|
# - master
|
||||||
|
|
||||||
jobs:
|
# jobs:
|
||||||
sync-develop:
|
# sync-develop:
|
||||||
runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
permissions:
|
# permissions:
|
||||||
contents: write
|
# contents: write
|
||||||
issues: write
|
# issues: write
|
||||||
steps:
|
# steps:
|
||||||
- name: Checkout repository
|
# - name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
with:
|
# with:
|
||||||
fetch-depth: 0
|
# fetch-depth: 0
|
||||||
ref: develop
|
# ref: develop
|
||||||
|
|
||||||
- name: Configure Git
|
# - name: Configure Git
|
||||||
run: |
|
# run: |
|
||||||
git config user.name "GitHub Actions"
|
# git config user.name "GitHub Actions"
|
||||||
git config user.email "actions@github.com"
|
# git config user.email "actions@github.com"
|
||||||
|
|
||||||
- name: Merge master into develop
|
# - name: Merge master into develop
|
||||||
run: |
|
# run: |
|
||||||
git fetch origin master:master
|
# git fetch origin master:master
|
||||||
git merge --no-ff origin/master -m "chore: Merge master into develop branch"
|
# git merge --no-ff origin/master -m "chore: Merge master into develop branch"
|
||||||
|
|
||||||
- name: Push changes
|
# - name: Push changes
|
||||||
run: |
|
# run: |
|
||||||
if ! git push origin develop; then
|
# if ! git push origin develop; then
|
||||||
echo "Failed to push to develop branch"
|
# echo "Failed to push to develop branch"
|
||||||
exit 1
|
# exit 1
|
||||||
fi
|
# fi
|
||||||
|
|
||||||
- name: Handle Failure
|
# - name: Handle Failure
|
||||||
if: failure()
|
# if: failure()
|
||||||
uses: actions/github-script@v7
|
# uses: actions/github-script@v7
|
||||||
with:
|
# with:
|
||||||
script: |
|
# script: |
|
||||||
const issueBody = `Automatic merge from master to develop failed.
|
# const issueBody = `Automatic merge from master to develop failed.
|
||||||
|
|
||||||
Please resolve this manually
|
# Please resolve this manually
|
||||||
|
|
||||||
Workflow run: ${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`;
|
# Workflow run: ${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`;
|
||||||
|
|
||||||
await github.rest.issues.create({
|
# await github.rest.issues.create({
|
||||||
owner: context.repo.owner,
|
# owner: context.repo.owner,
|
||||||
repo: context.repo.repo,
|
# repo: context.repo.repo,
|
||||||
title: '🔄 Automatic master to develop merge failed',
|
# title: '🔄 Automatic master to develop merge failed',
|
||||||
body: issueBody,
|
# body: issueBody,
|
||||||
labels: ['merge-failed', 'automation']
|
# labels: ['merge-failed', 'automation']
|
||||||
});
|
# });
|
||||||
|
|
81
.gitignore
vendored
81
.gitignore
vendored
|
@ -1,33 +1,44 @@
|
||||||
|
# Version control
|
||||||
|
.git
|
||||||
|
|
||||||
output/*
|
# Python
|
||||||
output_audio/*
|
__pycache__/
|
||||||
ui/data/*
|
|
||||||
|
|
||||||
*.db
|
|
||||||
*.pyc
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
*.pyd
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
.Python
|
||||||
|
.pytest_cache
|
||||||
|
.coverage
|
||||||
|
.coveragerc
|
||||||
|
|
||||||
|
# Python package build artifacts
|
||||||
|
*.egg-info/
|
||||||
|
*.egg
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
# .env
|
||||||
|
.venv/
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# Project specific
|
||||||
|
# Model files
|
||||||
|
*.pt
|
||||||
*.pth
|
*.pth
|
||||||
*.tar*
|
*.tar*
|
||||||
|
|
||||||
Kokoro-82M/*
|
# Voice files
|
||||||
__pycache__/
|
|
||||||
.vscode/
|
|
||||||
env/
|
|
||||||
.venv/
|
|
||||||
.Python
|
|
||||||
|
|
||||||
|
|
||||||
.coverage
|
|
||||||
|
|
||||||
examples/assorted_checks/benchmarks/output_audio/*
|
|
||||||
examples/assorted_checks/test_combinations/output/*
|
|
||||||
examples/assorted_checks/test_openai/output/*
|
|
||||||
|
|
||||||
examples/assorted_checks/test_voices/output/*
|
|
||||||
examples/assorted_checks/test_formats/output/*
|
|
||||||
examples/assorted_checks/benchmarks/output_audio_stream/*
|
|
||||||
ui/RepoScreenshot.png
|
|
||||||
examples/assorted_checks/benchmarks/output_audio_stream_openai/*
|
|
||||||
|
|
||||||
api/src/voices/af_bella.pt
|
api/src/voices/af_bella.pt
|
||||||
api/src/voices/af_nicole.pt
|
api/src/voices/af_nicole.pt
|
||||||
api/src/voices/af_sarah.pt
|
api/src/voices/af_sarah.pt
|
||||||
|
@ -39,14 +50,24 @@ api/src/voices/bf_emma.pt
|
||||||
api/src/voices/bf_isabella.pt
|
api/src/voices/bf_isabella.pt
|
||||||
api/src/voices/bm_george.pt
|
api/src/voices/bm_george.pt
|
||||||
api/src/voices/bm_lewis.pt
|
api/src/voices/bm_lewis.pt
|
||||||
|
|
||||||
|
# Audio files
|
||||||
|
examples/*.wav
|
||||||
|
examples/*.pcm
|
||||||
|
examples/*.mp3
|
||||||
|
examples/*.flac
|
||||||
|
examples/*.acc
|
||||||
|
examples/*.ogg
|
||||||
examples/speech.mp3
|
examples/speech.mp3
|
||||||
examples/phoneme_examples/output/example_1.wav
|
examples/phoneme_examples/output/example_1.wav
|
||||||
examples/phoneme_examples/output/example_2.wav
|
examples/phoneme_examples/output/example_2.wav
|
||||||
examples/phoneme_examples/output/example_3.wav
|
examples/phoneme_examples/output/example_3.wav
|
||||||
|
|
||||||
|
# Other project files
|
||||||
|
Kokoro-82M/
|
||||||
|
ui/data/
|
||||||
EXTERNAL_UV_DOCUMENTATION*
|
EXTERNAL_UV_DOCUMENTATION*
|
||||||
|
|
||||||
# Python package build artifacts
|
# Docker
|
||||||
*.egg-info/
|
Dockerfile*
|
||||||
*.egg
|
docker-compose*
|
||||||
dist/
|
|
||||||
build/
|
|
||||||
|
|
21
CHANGELOG.md
21
CHANGELOG.md
|
@ -2,12 +2,23 @@
|
||||||
|
|
||||||
Notable changes to this project will be documented in this file.
|
Notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
## [v0.0.6] - 2025-01-10
|
## [v0.1.0] - 2025-01-13
|
||||||
|
### Changed
|
||||||
|
- Major Docker improvements:
|
||||||
|
- Baked model directly into Dockerfile for improved deployment reliability
|
||||||
|
- Switched to uv for dependency management
|
||||||
|
- Streamlined container builds and reduced image sizes
|
||||||
|
- Dependency Management:
|
||||||
|
- Migrated from pip/poetry to uv for faster, more reliable package management
|
||||||
|
- Added uv.lock for deterministic builds
|
||||||
|
- Updated dependency resolution strategy
|
||||||
|
|
||||||
|
## [v0.0.5post1] - 2025-01-11
|
||||||
### Fixed
|
### Fixed
|
||||||
- Fixed dependency issues:
|
- Docker image tagging and versioning improvements (-gpu, -cpu, -ui)
|
||||||
- Let PyTorch manage numpy version
|
- Minor vram management improvements
|
||||||
- Pin aiofiles to 23.2.1 for Windows compatibility
|
- Gradio bugfix causing crashes and errant warnings
|
||||||
- Added CI workflow for testing
|
- Updated GPU and UI container configurations
|
||||||
|
|
||||||
## [v0.0.5] - 2025-01-10
|
## [v0.0.5] - 2025-01-10
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
52
README.md
52
README.md
|
@ -5,7 +5,7 @@
|
||||||
# Kokoro TTS API
|
# Kokoro TTS API
|
||||||
[]()
|
[]()
|
||||||
[]()
|
[]()
|
||||||
[](https://huggingface.co/hexgrad/Kokoro-82M/tree/c3b0d86e2a980e027ef71c28819ea02e351c2667) [](https://huggingface.co/spaces/Remsky/Kokoro-TTS-Zero)
|
[](https://huggingface.co/hexgrad/Kokoro-82M/tree/c3b0d86e2a980e027ef71c28819ea02e351c2667) [](https://huggingface.co/spaces/Remsky/Kokoro-TTS-Zero) [](https://www.buymeacoffee.com/remsky)
|
||||||
|
|
||||||
Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model
|
Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) text-to-speech model
|
||||||
- OpenAI-compatible Speech endpoint, with inline voice combination functionality
|
- OpenAI-compatible Speech endpoint, with inline voice combination functionality
|
||||||
|
@ -29,7 +29,8 @@ The service can be accessed through either the API endpoints or the Gradio web i
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/remsky/Kokoro-FastAPI.git
|
git clone https://github.com/remsky/Kokoro-FastAPI.git
|
||||||
cd Kokoro-FastAPI
|
cd Kokoro-FastAPI
|
||||||
docker compose up --build
|
docker compose up --build # for GPU
|
||||||
|
#docker compose -f docker-compose.cpu.yml up --build # for CPU
|
||||||
```
|
```
|
||||||
2. Run locally as an OpenAI-Compatible Speech Endpoint
|
2. Run locally as an OpenAI-Compatible Speech Endpoint
|
||||||
```python
|
```python
|
||||||
|
@ -317,6 +318,53 @@ with open("speech.wav", "wb") as f:
|
||||||
See `examples/phoneme_examples/generate_phonemes.py` for a sample script.
|
See `examples/phoneme_examples/generate_phonemes.py` for a sample script.
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
## Known Issues
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>Linux GPU Permissions</summary>
|
||||||
|
|
||||||
|
Some Linux users may encounter GPU permission issues when running as non-root.
|
||||||
|
Can't guarantee anything, but here are some common solutions, consider your security requirements carefully
|
||||||
|
|
||||||
|
### Option 1: Container Groups (Likely the best option)
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
kokoro-tts:
|
||||||
|
# ... existing config ...
|
||||||
|
group_add:
|
||||||
|
- "video"
|
||||||
|
- "render"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 2: Host System Groups
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
kokoro-tts:
|
||||||
|
# ... existing config ...
|
||||||
|
user: "${UID}:${GID}"
|
||||||
|
group_add:
|
||||||
|
- "video"
|
||||||
|
```
|
||||||
|
Note: May require adding host user to groups: `sudo usermod -aG docker,video $USER` and system restart.
|
||||||
|
|
||||||
|
### Option 3: Device Permissions (Use with caution)
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
kokoro-tts:
|
||||||
|
# ... existing config ...
|
||||||
|
devices:
|
||||||
|
- /dev/nvidia0:/dev/nvidia0
|
||||||
|
- /dev/nvidiactl:/dev/nvidiactl
|
||||||
|
- /dev/nvidia-uvm:/dev/nvidia-uvm
|
||||||
|
```
|
||||||
|
⚠️ Warning: Reduces system security. Use only in development environments.
|
||||||
|
|
||||||
|
Prerequisites: NVIDIA GPU, drivers, and container toolkit must be properly configured.
|
||||||
|
|
||||||
|
Visit [NVIDIA Container Toolkit installation](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) for more detailed information
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
## Model and License
|
## Model and License
|
||||||
|
|
||||||
<details open>
|
<details open>
|
||||||
|
|
|
@ -38,48 +38,64 @@ from .text_processing import tokenize, phonemize
|
||||||
# return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
|
# return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def forward(model, tokens, ref_s, speed):
|
def forward(model, tokens, ref_s, speed):
|
||||||
"""Forward pass through the model with light optimizations that preserve output quality"""
|
"""Forward pass through the model with moderate memory management"""
|
||||||
device = ref_s.device
|
device = ref_s.device
|
||||||
|
|
||||||
# Keep original token handling but optimize device placement
|
try:
|
||||||
|
# Initial tensor setup with proper device placement
|
||||||
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
|
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
|
||||||
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
|
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
|
||||||
text_mask = length_to_mask(input_lengths).to(device)
|
text_mask = length_to_mask(input_lengths).to(device)
|
||||||
|
|
||||||
|
# Split and clone reference signals with explicit device placement
|
||||||
|
s_content = ref_s[:, 128:].clone().to(device)
|
||||||
|
s_ref = ref_s[:, :128].clone().to(device)
|
||||||
|
|
||||||
# BERT and encoder pass
|
# BERT and encoder pass
|
||||||
bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
|
bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
|
||||||
d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
|
d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
|
||||||
|
|
||||||
# Split reference signal once for efficiency
|
|
||||||
s_content = ref_s[:, 128:]
|
|
||||||
s_ref = ref_s[:, :128]
|
|
||||||
|
|
||||||
# Predictor forward pass
|
# Predictor forward pass
|
||||||
d = model.predictor.text_encoder(d_en, s_content, input_lengths, text_mask)
|
d = model.predictor.text_encoder(d_en, s_content, input_lengths, text_mask)
|
||||||
x, _ = model.predictor.lstm(d)
|
x, _ = model.predictor.lstm(d)
|
||||||
|
|
||||||
# Duration prediction - keeping original logic
|
# Duration prediction
|
||||||
duration = model.predictor.duration_proj(x)
|
duration = model.predictor.duration_proj(x)
|
||||||
duration = torch.sigmoid(duration).sum(axis=-1) / speed
|
duration = torch.sigmoid(duration).sum(axis=-1) / speed
|
||||||
pred_dur = torch.round(duration).clamp(min=1).long()
|
pred_dur = torch.round(duration).clamp(min=1).long()
|
||||||
|
# Only cleanup large intermediates
|
||||||
|
del duration, x
|
||||||
|
|
||||||
# Alignment matrix construction - keeping original approach for quality
|
# Alignment matrix construction
|
||||||
pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item(), device=device)
|
pred_aln_trg = torch.zeros(input_lengths.item(), pred_dur.sum().item(), device=device)
|
||||||
c_frame = 0
|
c_frame = 0
|
||||||
for i in range(pred_aln_trg.size(0)):
|
for i in range(pred_aln_trg.size(0)):
|
||||||
pred_aln_trg[i, c_frame : c_frame + pred_dur[0, i].item()] = 1
|
pred_aln_trg[i, c_frame : c_frame + pred_dur[0, i].item()] = 1
|
||||||
c_frame += pred_dur[0, i].item()
|
c_frame += pred_dur[0, i].item()
|
||||||
|
pred_aln_trg = pred_aln_trg.unsqueeze(0)
|
||||||
|
|
||||||
# Matrix multiplications - reuse unsqueezed tensor
|
# Matrix multiplications with selective cleanup
|
||||||
pred_aln_trg = pred_aln_trg.unsqueeze(0) # Do unsqueeze once
|
|
||||||
en = d.transpose(-1, -2) @ pred_aln_trg
|
en = d.transpose(-1, -2) @ pred_aln_trg
|
||||||
F0_pred, N_pred = model.predictor.F0Ntrain(en, s_content)
|
del d # Free large intermediate tensor
|
||||||
|
|
||||||
# Text encoding and final decoding
|
F0_pred, N_pred = model.predictor.F0Ntrain(en, s_content)
|
||||||
|
del en # Free large intermediate tensor
|
||||||
|
|
||||||
|
# Final text encoding and decoding
|
||||||
t_en = model.text_encoder(tokens, input_lengths, text_mask)
|
t_en = model.text_encoder(tokens, input_lengths, text_mask)
|
||||||
asr = t_en @ pred_aln_trg
|
asr = t_en @ pred_aln_trg
|
||||||
|
del t_en # Free large intermediate tensor
|
||||||
|
|
||||||
return model.decoder(asr, F0_pred, N_pred, s_ref).squeeze().cpu().numpy()
|
# Final decoding and transfer to CPU
|
||||||
|
output = model.decoder(asr, F0_pred, N_pred, s_ref)
|
||||||
|
result = output.squeeze().cpu().numpy()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Let PyTorch handle most cleanup automatically
|
||||||
|
# Only explicitly free the largest tensors
|
||||||
|
del pred_aln_trg, asr
|
||||||
|
|
||||||
|
|
||||||
# def length_to_mask(lengths):
|
# def length_to_mask(lengths):
|
||||||
|
@ -179,7 +195,7 @@ class TTSGPUModel(TTSBaseModel):
|
||||||
def generate_from_tokens(
|
def generate_from_tokens(
|
||||||
cls, tokens: list[int], voicepack: torch.Tensor, speed: float
|
cls, tokens: list[int], voicepack: torch.Tensor, speed: float
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""Generate audio from tokens
|
"""Generate audio from tokens with moderate memory management
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
tokens: Token IDs
|
tokens: Token IDs
|
||||||
|
@ -192,10 +208,55 @@ class TTSGPUModel(TTSBaseModel):
|
||||||
if cls._instance is None:
|
if cls._instance is None:
|
||||||
raise RuntimeError("GPU model not initialized")
|
raise RuntimeError("GPU model not initialized")
|
||||||
|
|
||||||
# Get reference style
|
try:
|
||||||
ref_s = voicepack[len(tokens)]
|
device = cls._device
|
||||||
|
|
||||||
|
# Check memory pressure
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
memory_allocated = torch.cuda.memory_allocated(device) / 1e9 # Convert to GB
|
||||||
|
if memory_allocated > 2.0: # 2GB limit
|
||||||
|
logger.info(
|
||||||
|
f"Memory usage above 2GB threshold:{memory_allocated:.2f}GB "
|
||||||
|
f"Clearing cache"
|
||||||
|
)
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
import gc
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
# Get reference style with proper device placement
|
||||||
|
ref_s = voicepack[len(tokens)].clone().to(device)
|
||||||
|
|
||||||
# Generate audio
|
# Generate audio
|
||||||
audio = forward(cls._instance, tokens, ref_s, speed)
|
audio = forward(cls._instance, tokens, ref_s, speed)
|
||||||
|
|
||||||
return audio
|
return audio
|
||||||
|
|
||||||
|
except RuntimeError as e:
|
||||||
|
if "out of memory" in str(e):
|
||||||
|
# On OOM, do a full cleanup and retry
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
logger.warning("Out of memory detected, performing full cleanup")
|
||||||
|
torch.cuda.synchronize()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
import gc
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
# Log memory stats after cleanup
|
||||||
|
memory_allocated = torch.cuda.memory_allocated(device)
|
||||||
|
memory_reserved = torch.cuda.memory_reserved(device)
|
||||||
|
logger.info(
|
||||||
|
f"Memory after OOM cleanup: "
|
||||||
|
f"Allocated: {memory_allocated / 1e9:.2f}GB, "
|
||||||
|
f"Reserved: {memory_reserved / 1e9:.2f}GB"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Retry generation
|
||||||
|
ref_s = voicepack[len(tokens)].clone().to(device)
|
||||||
|
audio = forward(cls._instance, tokens, ref_s, speed)
|
||||||
|
return audio
|
||||||
|
raise
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Only synchronize at the top level, no empty_cache
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.synchronize()
|
||||||
|
|
|
@ -20,17 +20,17 @@ services:
|
||||||
- ONNX_MEMORY_PATTERN=true
|
- ONNX_MEMORY_PATTERN=true
|
||||||
- ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo
|
- ONNX_ARENA_EXTEND_STRATEGY=kNextPowerOfTwo
|
||||||
|
|
||||||
# # Gradio UI service [Comment out everything below if you don't need it]
|
# Gradio UI service [Comment out everything below if you don't need it]
|
||||||
# gradio-ui:
|
gradio-ui:
|
||||||
# # image: ghcr.io/remsky/kokoro-fastapi:latest-ui
|
# image: ghcr.io/remsky/kokoro-fastapi:latest-ui
|
||||||
# # Uncomment below (and comment out above) to build from source instead of using the released image
|
# Uncomment below (and comment out above) to build from source instead of using the released image
|
||||||
# # build:
|
# build:
|
||||||
# # context: ./ui
|
# context: ./ui
|
||||||
# ports:
|
ports:
|
||||||
# - "7860:7860"
|
- "7860:7860"
|
||||||
# volumes:
|
volumes:
|
||||||
# - ./ui/data:/app/ui/data
|
- ./ui/data:/app/ui/data
|
||||||
# - ./ui/app.py:/app/app.py # Mount app.py for hot reload
|
- ./ui/app.py:/app/app.py # Mount app.py for hot reload
|
||||||
# environment:
|
environment:
|
||||||
# - GRADIO_WATCH=True # Enable hot reloading
|
- GRADIO_WATCH=True # Enable hot reloading
|
||||||
# - PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
|
- PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
|
||||||
|
|
|
@ -1,45 +1,10 @@
|
||||||
services:
|
services:
|
||||||
# model-fetcher:
|
|
||||||
# image: datamachines/git-lfs:latest
|
|
||||||
# environment:
|
|
||||||
# - SKIP_MODEL_FETCH=${SKIP_MODEL_FETCH:-false}
|
|
||||||
# volumes:
|
|
||||||
# - ./Kokoro-82M:/app/Kokoro-82M
|
|
||||||
# working_dir: /app/Kokoro-82M
|
|
||||||
# command: >
|
|
||||||
# sh -c "
|
|
||||||
# if [ \"$$SKIP_MODEL_FETCH\" = \"true\" ]; then
|
|
||||||
# echo 'Skipping model fetch...' && touch .cloned;
|
|
||||||
# else
|
|
||||||
# rm -f .git/index.lock;
|
|
||||||
# if [ -z \"$(ls -A .)\" ]; then
|
|
||||||
# git clone https://huggingface.co/hexgrad/Kokoro-82M .
|
|
||||||
# touch .cloned;
|
|
||||||
# else
|
|
||||||
# rm -f .git/index.lock && \
|
|
||||||
# git checkout main && \
|
|
||||||
# git pull origin main && \
|
|
||||||
# touch .cloned;
|
|
||||||
# fi;
|
|
||||||
# fi;
|
|
||||||
# tail -f /dev/null
|
|
||||||
# "
|
|
||||||
# healthcheck:
|
|
||||||
# test: ["CMD", "test", "-f", ".cloned"]
|
|
||||||
# interval: 5s
|
|
||||||
# timeout: 2s
|
|
||||||
# retries: 300
|
|
||||||
# start_period: 1s
|
|
||||||
|
|
||||||
kokoro-tts:
|
kokoro-tts:
|
||||||
# image: ghcr.io/remsky/kokoro-fastapi-gpu:latest
|
|
||||||
# Uncomment below to build from source instead of using the released image
|
|
||||||
build:
|
build:
|
||||||
context: ../..
|
context: ../..
|
||||||
dockerfile: docker/gpu/Dockerfile
|
dockerfile: docker/gpu/Dockerfile
|
||||||
volumes:
|
volumes:
|
||||||
- ../../api/src:/app/api/src
|
- ../../api/src:/app/api/src # Mount src for development
|
||||||
- ../../Kokoro-82M:/app/Kokoro-82M
|
|
||||||
ports:
|
ports:
|
||||||
- "8880:8880"
|
- "8880:8880"
|
||||||
environment:
|
environment:
|
||||||
|
@ -51,21 +16,18 @@ services:
|
||||||
- driver: nvidia
|
- driver: nvidia
|
||||||
count: 1
|
count: 1
|
||||||
capabilities: [gpu]
|
capabilities: [gpu]
|
||||||
# depends_on:
|
|
||||||
# model-fetcher:
|
|
||||||
# condition: service_healthy
|
|
||||||
|
|
||||||
# Gradio UI service [Comment out everything below if you don't need it]
|
# Gradio UI service
|
||||||
# gradio-ui:
|
gradio-ui:
|
||||||
# image: ghcr.io/remsky/kokoro-fastapi-ui:latest
|
# image: ghcr.io/remsky/kokoro-fastapi-ui:latest
|
||||||
# Uncomment below to build from source instead of using the released image
|
# Uncomment below to build from source instead of using the released image
|
||||||
# build:
|
build:
|
||||||
# context: ./ui
|
context: ../../ui
|
||||||
# ports:
|
ports:
|
||||||
# - "7860:7860"
|
- "7860:7860"
|
||||||
# volumes:
|
volumes:
|
||||||
# - ./ui/data:/app/ui/data
|
- ../../ui/data:/app/ui/data
|
||||||
# - ./ui/app.py:/app/app.py # Mount app.py for hot reload
|
- ../../ui/app.py:/app/app.py # Mount app.py for hot reload
|
||||||
# environment:
|
environment:
|
||||||
# - GRADIO_WATCH=1 # Enable hot reloading
|
- GRADIO_WATCH=1 # Enable hot reloading
|
||||||
# - PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
|
- PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
|
||||||
|
|
Loading…
Add table
Reference in a new issue