Renames service and expands Docker configuration

Updates the service name to 'InstaVoice' and introduces multiple server services to enhance scalability.
Modifies GPU resource allocation to use all available devices and adds an NGINX service for reverse proxy capabilities.

Simplifies the structure for better management of the API and UI services.
This commit is contained in:
rvuyyuru2 2025-01-25 21:55:53 +05:30
parent 44c62467ae
commit 6d717437c9
3 changed files with 165 additions and 21 deletions

View file

@ -1,7 +1,6 @@
name: kokoro-tts
name: InstaVoice
services:
kokoro-tts:
# image: ghcr.io/remsky/kokoro-fastapi-gpu:v0.1.0
server1:
build:
context: ../..
dockerfile: docker/gpu/Dockerfile
@ -19,23 +18,59 @@ services:
reservations:
devices:
- driver: nvidia
count: 1
count: all
capabilities: [gpu]
# # Gradio UI service
# gradio-ui:
# image: ghcr.io/remsky/kokoro-fastapi-ui:v0.1.0
# # Uncomment below to build from source instead of using the released image
# # build:
# # context: ../../ui
# ports:
# - "7860:7860"
# volumes:
# - ../../ui/data:/app/ui/data
# - ../../ui/app.py:/app/app.py # Mount app.py for hot reload
# environment:
# - GRADIO_WATCH=1 # Enable hot reloading
# - PYTHONUNBUFFERED=1 # Ensure Python output is not buffered
# - DISABLE_LOCAL_SAVING=false # Set to 'true' to disable local saving and hide file view
# - API_HOST=kokoro-tts # Set TTS service URL
# - API_PORT=8880 # Set TTS service PORT
server2:
build:
context: ../..
dockerfile: docker/gpu/Dockerfile
volumes:
- ../../api:/app/api
ports:
- "8880:8880"
environment:
- PYTHONPATH=/app:/app/api
- USE_GPU=true
- USE_ONNX=false
- PYTHONUNBUFFERED=1
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
server3:
build:
context: ../..
dockerfile: docker/gpu/Dockerfile
volumes:
- ../../api:/app/api
ports:
- "8880:8880"
environment:
- PYTHONPATH=/app:/app/api
- USE_GPU=true
- USE_ONNX=false
- PYTHONUNBUFFERED=1
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
nginx:
image: nginx:alpine
ports:
- "80:80" # Expose port 80 on the host machine
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf # Load custom NGINX configuration
depends_on:
- server3
- server1
- server2

78
docker/gpu/nginx.conf Normal file
View file

@ -0,0 +1,78 @@
user nginx;
worker_processes auto; # Automatically adjust worker processes based on available CPUs
events {
worker_connections 1024; # Maximum simultaneous connections per worker
use epoll; # Use efficient event handling for Linux
}
http {
# Basic security headers
add_header X-Frame-Options SAMEORIGIN always; # Prevent clickjacking
add_header X-Content-Type-Options nosniff always; # Prevent MIME-type sniffing
add_header X-XSS-Protection "1; mode=block" always; # Enable XSS protection in browsers
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; # Enforce HTTPS
add_header Content-Security-Policy "default-src 'self';" always; # Restrict resource loading to same origin
# Timeouts
sendfile on; # Enable sendfile for efficient file serving
tcp_nopush on; # Reduce packet overhead
tcp_nodelay on; # Minimize latency
keepalive_timeout 65; # Keep connections alive for 65 seconds
client_max_body_size 10m; # Limit request body size to 10MB
client_body_timeout 12; # Timeout for client body read
client_header_timeout 12; # Timeout for client header read
# Compression
gzip on; # Enable gzip compression
gzip_disable "msie6"; # Disable gzip for old browsers
gzip_vary on; # Add "Vary: Accept-Encoding" header
gzip_proxied any; # Enable gzip for proxied requests
gzip_comp_level 6; # Compression level
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
# Load balancing upstream
upstream backend {
least_conn; # Use least connections load balancing strategy
server server1:8880 max_fails=3 fail_timeout=5s; # Add health check for backend servers
# Uncomment additional servers for scaling:
server server2:8880 max_fails=3 fail_timeout=5s;
server server3:8880 max_fails=3 fail_timeout=5s;
}
server {
listen 80;
# Redirect HTTP to HTTPS (optional)
# Uncomment the lines below if SSL is configured:
# listen 443 ssl;
# ssl_certificate /path/to/certificate.crt;
# ssl_certificate_key /path/to/private.key;
location / {
proxy_pass http://backend; # Proxy traffic to the backend servers
proxy_http_version 1.1; # Use HTTP/1.1 for persistent connections
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $remote_addr; # Forward client IP
proxy_cache_bypass $http_upgrade;
proxy_read_timeout 60s; # Adjust read timeout for backend
proxy_connect_timeout 60s; # Adjust connection timeout for backend
proxy_send_timeout 60s; # Adjust send timeout for backend
}
# Custom error pages
error_page 502 503 504 /50x.html;
location = /50x.html {
root /usr/share/nginx/html;
}
# Deny access to hidden files (e.g., .git)
location ~ /\. {
deny all;
access_log off;
log_not_found off;
}
}
}

31
setup.sh Executable file
View file

@ -0,0 +1,31 @@
#!/bin/bash
# Ensure models directory exists
mkdir -p api/src/models
# Function to download a file
download_file() {
local url="$1"
local filename=$(basename "$url")
echo "Downloading $filename..."
curl -L "$url" -o "api/src/models/$filename"
}
# Default PTH model if no arguments provided
DEFAULT_MODELS=(
"https://github.com/remsky/Kokoro-FastAPI/releases/download/v0.1.0/kokoro-v0_19.pth"
)
# Use provided models or default
if [ $# -gt 0 ]; then
MODELS=("$@")
else
MODELS=("${DEFAULT_MODELS[@]}")
fi
# Download all models
for model in "${MODELS[@]}"; do
download_file "$model"
done
echo "PyTorch model download complete!"