From 3c8e1b9143cf4629f57d40e2760068fd40442b24 Mon Sep 17 00:00:00 2001
From: Richard Roberson <richardr1126@gmail.com>
Date: Sat, 22 Mar 2025 05:54:58 -0600
Subject: [PATCH] Add example folder values files for Azure AKS and Nvidia GPU
 Operator

---
 .../{ => examples}/aks-tls-values.yaml        |  2 +-
 .../examples/gpu-operator-values.yaml         | 56 +++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 rename charts/kokoro-fastapi/{ => examples}/aks-tls-values.yaml (98%)
 create mode 100644 charts/kokoro-fastapi/examples/gpu-operator-values.yaml

diff --git a/charts/kokoro-fastapi/aks-tls-values.yaml b/charts/kokoro-fastapi/examples/aks-tls-values.yaml
similarity index 98%
rename from charts/kokoro-fastapi/aks-tls-values.yaml
rename to charts/kokoro-fastapi/examples/aks-tls-values.yaml
index 2a6d13d..236af0a 100644
--- a/charts/kokoro-fastapi/aks-tls-values.yaml
+++ b/charts/kokoro-fastapi/examples/aks-tls-values.yaml
@@ -19,7 +19,7 @@
 #   --skip-gpu-driver-install
 
 kokoroTTS:
-  replicaCount: 2
+  replicaCount: 8
   port: 8880
   tag: v0.2.0
   pullPolicy: IfNotPresent
diff --git a/charts/kokoro-fastapi/examples/gpu-operator-values.yaml b/charts/kokoro-fastapi/examples/gpu-operator-values.yaml
new file mode 100644
index 0000000..b74667f
--- /dev/null
+++ b/charts/kokoro-fastapi/examples/gpu-operator-values.yaml
@@ -0,0 +1,56 @@
+# Follow the official NVIDIA GPU Operator documentation
+#   to install the GPU operator with these settings:
+#     https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html
+#
+# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS.
+# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs.
+# So each pod has access to a smaller gpu with 4gb of memory.
+#
+devicePlugin: # Remove this if you dont want to use time-slicing
+  config:
+    create: true
+    name: "time-slicing-config"
+    default: "any"
+    data:
+      any: |-
+        version: v1
+        flags:
+          migStrategy: none
+        sharing:
+          timeSlicing:
+            resources:
+            - name: nvidia.com/gpu
+              replicas: 4
+
+daemonsets:
+  tolerations:
+    - key: "sku"
+      operator: Equal
+      value: "gpu"
+      effect: NoSchedule
+    - key: "kubernetes.azure.com/scalesetpriority"
+      operator: Equal
+      value: "spot"
+      effect: NoSchedule
+
+node-feature-discovery:
+  master:
+    tolerations:
+      - key: "sku"
+        operator: Equal
+        value: "gpu"
+        effect: NoSchedule
+      - key: "kubernetes.azure.com/scalesetpriority"
+        operator: Equal
+        value: "spot"
+        effect: NoSchedule
+  worker:
+    tolerations:
+      - key: "sku"
+        operator: Equal
+        value: "gpu"
+        effect: NoSchedule
+      - key: "kubernetes.azure.com/scalesetpriority"
+        operator: Equal
+        value: "spot"
+        effect: NoSchedule
\ No newline at end of file