From 6d717437c9ac22fa9f376a882d9b004be23da2d0 Mon Sep 17 00:00:00 2001
From: rvuyyuru2 <rajasekhar@instaread.co>
Date: Sat, 25 Jan 2025 21:55:53 +0530
Subject: [PATCH] Renames service and expands Docker configuration

Updates the service name to 'InstaVoice' and introduces multiple server services to enhance scalability.
Modifies GPU resource allocation to use all available devices and adds an NGINX service for reverse proxy capabilities.

Simplifies the structure for better management of the API and UI services.
---
 docker/gpu/docker-compose.yml | 77 ++++++++++++++++++++++++----------
 docker/gpu/nginx.conf         | 78 +++++++++++++++++++++++++++++++++++
 setup.sh                      | 31 ++++++++++++++
 3 files changed, 165 insertions(+), 21 deletions(-)
 create mode 100644 docker/gpu/nginx.conf
 create mode 100755 setup.sh

diff --git a/docker/gpu/docker-compose.yml b/docker/gpu/docker-compose.yml
index f27e15b..c711925 100644
--- a/docker/gpu/docker-compose.yml
+++ b/docker/gpu/docker-compose.yml
@@ -1,7 +1,6 @@
-name: kokoro-tts
+name: InstaVoice
 services:
-  kokoro-tts:
-    # image: ghcr.io/remsky/kokoro-fastapi-gpu:v0.1.0
+ server1:
     build:
       context: ../..
       dockerfile: docker/gpu/Dockerfile
@@ -19,23 +18,59 @@ services:
         reservations:
           devices:
             - driver: nvidia
-              count: 1
+              count: all
               capabilities: [gpu]
 
-  # # Gradio UI service
-  # gradio-ui:
-  #   image: ghcr.io/remsky/kokoro-fastapi-ui:v0.1.0
-  #   # Uncomment below to build from source instead of using the released image
-  #   # build:
-  #     # context: ../../ui
-  #   ports:
-  #     - "7860:7860"
-  #   volumes:
-  #     - ../../ui/data:/app/ui/data
-  #     - ../../ui/app.py:/app/app.py  # Mount app.py for hot reload
-  #   environment:
-  #     - GRADIO_WATCH=1  # Enable hot reloading
-  #     - PYTHONUNBUFFERED=1  # Ensure Python output is not buffered
-  #     - DISABLE_LOCAL_SAVING=false  # Set to 'true' to disable local saving and hide file view
-  #     - API_HOST=kokoro-tts  # Set TTS service URL
-  #     - API_PORT=8880  # Set TTS service PORT
+ server2:
+    build:
+      context: ../..
+      dockerfile: docker/gpu/Dockerfile
+    volumes:
+      - ../../api:/app/api
+    ports:
+      - "8880:8880"
+    environment:
+      - PYTHONPATH=/app:/app/api
+      - USE_GPU=true
+      - USE_ONNX=false
+      - PYTHONUNBUFFERED=1
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+
+ server3:
+    build:
+      context: ../..
+      dockerfile: docker/gpu/Dockerfile
+    volumes:
+      - ../../api:/app/api
+    ports:
+      - "8880:8880"
+    environment:
+      - PYTHONPATH=/app:/app/api
+      - USE_GPU=true
+      - USE_ONNX=false
+      - PYTHONUNBUFFERED=1
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+
+ 
+nginx:
+    image: nginx:alpine
+    ports:
+      - "80:80" # Expose port 80 on the host machine
+    volumes:
+      - ./nginx.conf:/etc/nginx/nginx.conf # Load custom NGINX configuration
+    depends_on:
+      - server3
+      - server1
+      - server2
diff --git a/docker/gpu/nginx.conf b/docker/gpu/nginx.conf
new file mode 100644
index 0000000..a11ab84
--- /dev/null
+++ b/docker/gpu/nginx.conf
@@ -0,0 +1,78 @@
+user nginx;
+worker_processes auto; # Automatically adjust worker processes based on available CPUs
+
+events {
+    worker_connections 1024; # Maximum simultaneous connections per worker
+    use epoll;               # Use efficient event handling for Linux
+}
+
+http {
+    # Basic security headers
+    add_header X-Frame-Options SAMEORIGIN always;  # Prevent clickjacking
+    add_header X-Content-Type-Options nosniff always;  # Prevent MIME-type sniffing
+    add_header X-XSS-Protection "1; mode=block" always; # Enable XSS protection in browsers
+    add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; # Enforce HTTPS
+    add_header Content-Security-Policy "default-src 'self';" always; # Restrict resource loading to same origin
+
+    # Timeouts
+    sendfile on;                      # Enable sendfile for efficient file serving
+    tcp_nopush on;                    # Reduce packet overhead
+    tcp_nodelay on;                   # Minimize latency
+    keepalive_timeout 65;             # Keep connections alive for 65 seconds
+    client_max_body_size 10m;         # Limit request body size to 10MB
+    client_body_timeout 12;           # Timeout for client body read
+    client_header_timeout 12;         # Timeout for client header read
+
+    # Compression
+    gzip on;                          # Enable gzip compression
+    gzip_disable "msie6";             # Disable gzip for old browsers
+    gzip_vary on;                     # Add "Vary: Accept-Encoding" header
+    gzip_proxied any;                 # Enable gzip for proxied requests
+    gzip_comp_level 6;                # Compression level
+    gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
+
+    # Load balancing upstream
+    upstream backend {
+        least_conn; # Use least connections load balancing strategy
+        server server1:8880 max_fails=3 fail_timeout=5s; # Add health check for backend servers
+        # Uncomment additional servers for scaling:
+        server server2:8880 max_fails=3 fail_timeout=5s;
+        server server3:8880 max_fails=3 fail_timeout=5s;
+    }
+
+    server {
+        listen 80;
+
+        # Redirect HTTP to HTTPS (optional)
+        # Uncomment the lines below if SSL is configured:
+        # listen 443 ssl;
+        # ssl_certificate /path/to/certificate.crt;
+        # ssl_certificate_key /path/to/private.key;
+
+        location / {
+            proxy_pass http://backend;              # Proxy traffic to the backend servers
+            proxy_http_version 1.1;                # Use HTTP/1.1 for persistent connections
+            proxy_set_header Upgrade $http_upgrade;
+            proxy_set_header Connection "upgrade";
+            proxy_set_header Host $host;
+            proxy_set_header X-Forwarded-For $remote_addr; # Forward client IP
+            proxy_cache_bypass $http_upgrade;
+            proxy_read_timeout 60s;                # Adjust read timeout for backend
+            proxy_connect_timeout 60s;            # Adjust connection timeout for backend
+            proxy_send_timeout 60s;               # Adjust send timeout for backend
+        }
+
+        # Custom error pages
+        error_page 502 503 504 /50x.html;
+        location = /50x.html {
+            root /usr/share/nginx/html;
+        }
+
+        # Deny access to hidden files (e.g., .git)
+        location ~ /\. {
+            deny all;
+            access_log off;
+            log_not_found off;
+        }
+    }
+}
diff --git a/setup.sh b/setup.sh
new file mode 100755
index 0000000..c8bda83
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+# Ensure models directory exists
+mkdir -p api/src/models
+
+# Function to download a file
+download_file() {
+    local url="$1"
+    local filename=$(basename "$url")
+    echo "Downloading $filename..."
+    curl -L "$url" -o "api/src/models/$filename"
+}
+
+# Default PTH model if no arguments provided
+DEFAULT_MODELS=(
+    "https://github.com/remsky/Kokoro-FastAPI/releases/download/v0.1.0/kokoro-v0_19.pth"
+)
+
+# Use provided models or default
+if [ $# -gt 0 ]; then
+    MODELS=("$@")
+else
+    MODELS=("${DEFAULT_MODELS[@]}")
+fi
+
+# Download all models
+for model in "${MODELS[@]}"; do
+    download_file "$model"
+done
+
+echo "PyTorch model download complete!"
\ No newline at end of file