From 3c8e1b9143cf4629f57d40e2760068fd40442b24 Mon Sep 17 00:00:00 2001 From: Richard Roberson Date: Sat, 22 Mar 2025 05:54:58 -0600 Subject: [PATCH] Add example folder values files for Azure AKS and Nvidia GPU Operator --- .../{ => examples}/aks-tls-values.yaml | 2 +- .../examples/gpu-operator-values.yaml | 56 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) rename charts/kokoro-fastapi/{ => examples}/aks-tls-values.yaml (98%) create mode 100644 charts/kokoro-fastapi/examples/gpu-operator-values.yaml diff --git a/charts/kokoro-fastapi/aks-tls-values.yaml b/charts/kokoro-fastapi/examples/aks-tls-values.yaml similarity index 98% rename from charts/kokoro-fastapi/aks-tls-values.yaml rename to charts/kokoro-fastapi/examples/aks-tls-values.yaml index 2a6d13d..236af0a 100644 --- a/charts/kokoro-fastapi/aks-tls-values.yaml +++ b/charts/kokoro-fastapi/examples/aks-tls-values.yaml @@ -19,7 +19,7 @@ # --skip-gpu-driver-install kokoroTTS: - replicaCount: 2 + replicaCount: 8 port: 8880 tag: v0.2.0 pullPolicy: IfNotPresent diff --git a/charts/kokoro-fastapi/examples/gpu-operator-values.yaml b/charts/kokoro-fastapi/examples/gpu-operator-values.yaml new file mode 100644 index 0000000..b74667f --- /dev/null +++ b/charts/kokoro-fastapi/examples/gpu-operator-values.yaml @@ -0,0 +1,56 @@ +# Follow the official NVIDIA GPU Operator documentation +# to install the GPU operator with these settings: +# https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html +# +# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS. +# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs. +# So each pod has access to a smaller gpu with 4gb of memory. +# +devicePlugin: # Remove this if you dont want to use time-slicing + config: + create: true + name: "time-slicing-config" + default: "any" + data: + any: |- + version: v1 + flags: + migStrategy: none + sharing: + timeSlicing: + resources: + - name: nvidia.com/gpu + replicas: 4 + +daemonsets: + tolerations: + - key: "sku" + operator: Equal + value: "gpu" + effect: NoSchedule + - key: "kubernetes.azure.com/scalesetpriority" + operator: Equal + value: "spot" + effect: NoSchedule + +node-feature-discovery: + master: + tolerations: + - key: "sku" + operator: Equal + value: "gpu" + effect: NoSchedule + - key: "kubernetes.azure.com/scalesetpriority" + operator: Equal + value: "spot" + effect: NoSchedule + worker: + tolerations: + - key: "sku" + operator: Equal + value: "gpu" + effect: NoSchedule + - key: "kubernetes.azure.com/scalesetpriority" + operator: Equal + value: "spot" + effect: NoSchedule \ No newline at end of file