diff --git a/docker/gpu/docker-compose.yml b/docker/gpu/docker-compose.yml index f27e15b..c711925 100644 --- a/docker/gpu/docker-compose.yml +++ b/docker/gpu/docker-compose.yml @@ -1,7 +1,6 @@ -name: kokoro-tts +name: InstaVoice services: - kokoro-tts: - # image: ghcr.io/remsky/kokoro-fastapi-gpu:v0.1.0 + server1: build: context: ../.. dockerfile: docker/gpu/Dockerfile @@ -19,23 +18,59 @@ services: reservations: devices: - driver: nvidia - count: 1 + count: all capabilities: [gpu] - # # Gradio UI service - # gradio-ui: - # image: ghcr.io/remsky/kokoro-fastapi-ui:v0.1.0 - # # Uncomment below to build from source instead of using the released image - # # build: - # # context: ../../ui - # ports: - # - "7860:7860" - # volumes: - # - ../../ui/data:/app/ui/data - # - ../../ui/app.py:/app/app.py # Mount app.py for hot reload - # environment: - # - GRADIO_WATCH=1 # Enable hot reloading - # - PYTHONUNBUFFERED=1 # Ensure Python output is not buffered - # - DISABLE_LOCAL_SAVING=false # Set to 'true' to disable local saving and hide file view - # - API_HOST=kokoro-tts # Set TTS service URL - # - API_PORT=8880 # Set TTS service PORT + server2: + build: + context: ../.. + dockerfile: docker/gpu/Dockerfile + volumes: + - ../../api:/app/api + ports: + - "8880:8880" + environment: + - PYTHONPATH=/app:/app/api + - USE_GPU=true + - USE_ONNX=false + - PYTHONUNBUFFERED=1 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + server3: + build: + context: ../.. + dockerfile: docker/gpu/Dockerfile + volumes: + - ../../api:/app/api + ports: + - "8880:8880" + environment: + - PYTHONPATH=/app:/app/api + - USE_GPU=true + - USE_ONNX=false + - PYTHONUNBUFFERED=1 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + +nginx: + image: nginx:alpine + ports: + - "80:80" # Expose port 80 on the host machine + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf # Load custom NGINX configuration + depends_on: + - server3 + - server1 + - server2 diff --git a/docker/gpu/nginx.conf b/docker/gpu/nginx.conf new file mode 100644 index 0000000..a11ab84 --- /dev/null +++ b/docker/gpu/nginx.conf @@ -0,0 +1,78 @@ +user nginx; +worker_processes auto; # Automatically adjust worker processes based on available CPUs + +events { + worker_connections 1024; # Maximum simultaneous connections per worker + use epoll; # Use efficient event handling for Linux +} + +http { + # Basic security headers + add_header X-Frame-Options SAMEORIGIN always; # Prevent clickjacking + add_header X-Content-Type-Options nosniff always; # Prevent MIME-type sniffing + add_header X-XSS-Protection "1; mode=block" always; # Enable XSS protection in browsers + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; # Enforce HTTPS + add_header Content-Security-Policy "default-src 'self';" always; # Restrict resource loading to same origin + + # Timeouts + sendfile on; # Enable sendfile for efficient file serving + tcp_nopush on; # Reduce packet overhead + tcp_nodelay on; # Minimize latency + keepalive_timeout 65; # Keep connections alive for 65 seconds + client_max_body_size 10m; # Limit request body size to 10MB + client_body_timeout 12; # Timeout for client body read + client_header_timeout 12; # Timeout for client header read + + # Compression + gzip on; # Enable gzip compression + gzip_disable "msie6"; # Disable gzip for old browsers + gzip_vary on; # Add "Vary: Accept-Encoding" header + gzip_proxied any; # Enable gzip for proxied requests + gzip_comp_level 6; # Compression level + gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; + + # Load balancing upstream + upstream backend { + least_conn; # Use least connections load balancing strategy + server server1:8880 max_fails=3 fail_timeout=5s; # Add health check for backend servers + # Uncomment additional servers for scaling: + server server2:8880 max_fails=3 fail_timeout=5s; + server server3:8880 max_fails=3 fail_timeout=5s; + } + + server { + listen 80; + + # Redirect HTTP to HTTPS (optional) + # Uncomment the lines below if SSL is configured: + # listen 443 ssl; + # ssl_certificate /path/to/certificate.crt; + # ssl_certificate_key /path/to/private.key; + + location / { + proxy_pass http://backend; # Proxy traffic to the backend servers + proxy_http_version 1.1; # Use HTTP/1.1 for persistent connections + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $remote_addr; # Forward client IP + proxy_cache_bypass $http_upgrade; + proxy_read_timeout 60s; # Adjust read timeout for backend + proxy_connect_timeout 60s; # Adjust connection timeout for backend + proxy_send_timeout 60s; # Adjust send timeout for backend + } + + # Custom error pages + error_page 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } + + # Deny access to hidden files (e.g., .git) + location ~ /\. { + deny all; + access_log off; + log_not_found off; + } + } +} diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..c8bda83 --- /dev/null +++ b/setup.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Ensure models directory exists +mkdir -p api/src/models + +# Function to download a file +download_file() { + local url="$1" + local filename=$(basename "$url") + echo "Downloading $filename..." + curl -L "$url" -o "api/src/models/$filename" +} + +# Default PTH model if no arguments provided +DEFAULT_MODELS=( + "https://github.com/remsky/Kokoro-FastAPI/releases/download/v0.1.0/kokoro-v0_19.pth" +) + +# Use provided models or default +if [ $# -gt 0 ]; then + MODELS=("$@") +else + MODELS=("${DEFAULT_MODELS[@]}") +fi + +# Download all models +for model in "${MODELS[@]}"; do + download_file "$model" +done + +echo "PyTorch model download complete!" \ No newline at end of file