Merge pull request #257 from richardr1126/update-helm-charts

Fix Helm charts health check, ingress, and values
2025-09-18 21:39:23 +00:00 · 2025-03-26 10:11:52 -04:00 · 2025-03-26 10:11:52 -04:00 · d0c13f6401
commit d0c13f6401
parent fe99bb7697 3c8e1b9143
8 changed files with 183 additions and 137 deletions
--- a/charts/kokoro-fastapi/Chart.yaml
+++ b/charts/kokoro-fastapi/Chart.yaml
@ -1,24 +1,12 @@
 apiVersion: v2
 name: kokoro-fastapi
-description: A Helm chart for kokoro-fastapi
-
-# A chart can be either an 'application' or a 'library' chart.
-#
-# Application charts are a collection of templates that can be packaged into versioned archives
-# to be deployed.
-#
-# Library charts provide useful utilities or functions for the chart developer. They're included as
-# a dependency of application charts to inject those utilities and functions into the rendering
-# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes
 type: application
+version: 0.2.0
+appVersion: "0.2.0"

-# This is the chart version. This version number should be incremented each time you make changes
-# to the chart and its templates, including the app version.
-# Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
-
-# This is the version number of the application being deployed. This version number should be
-# incremented each time you make changes to the application. Versions are not expected to
-# follow Semantic Versioning. They should reflect the version the application is using.
-# It is recommended to use it with quotes.
-appVersion: "1.16.0"
+keywords:
+  - tts
+  - fastapi
+  - gpu
+  - kokoro
--- a/charts/kokoro-fastapi/examples/aks-tls-values.yaml
+++ b/charts/kokoro-fastapi/examples/aks-tls-values.yaml
@ -0,0 +1,54 @@
+# Tested on
+# - Azure AKS with GPU node pool with Nvidia GPU operator
+# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests
+# 
+# Azure CLI command to create a GPU node pool:
+# az aks nodepool add \
+#   --resource-group $AZ_RESOURCE_GROUP \
+#   --cluster-name $CLUSTER_NAME \
+#   --name t4gpus \
+#   --node-vm-size Standard_NC4as_T4_v3 \
+#   --node-count 2 \
+#   --enable-cluster-autoscaler \
+#   --min-count 1 \
+#   --max-count 2 \
+#   --priority Spot \
+#   --eviction-policy Delete \
+#   --spot-max-price -1 \
+#   --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \
+#   --skip-gpu-driver-install
+
+kokoroTTS:
+  replicaCount: 8
+  port: 8880
+  tag: v0.2.0
+  pullPolicy: IfNotPresent
+
+# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator
+tolerations:
+  - key: "kubernetes.azure.com/scalesetpriority"
+    operator: Equal
+    value: "spot"
+    effect: NoSchedule
+  - key: "sku"
+    operator: Equal
+    value: "gpu"
+    effect: NoSchedule
+
+ingress:
+  enabled: true
+  className: "nginx"
+  annotations:
+    # Requires cert-manager and external-dns to be in the cluster for TLS and DNS
+    cert-manager.io/cluster-issuer: letsencrypt-prod
+    external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname
+    external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
+  hosts:
+    - host: your-external-dns-enabled-hostname
+      paths:
+        - path: /
+          pathType: Prefix
+  tls:
+    - secretName: kokoro-fastapi-tls
+      hosts:
+        - your-external-dns-enabled-hostname
--- a/charts/kokoro-fastapi/examples/gpu-operator-values.yaml
+++ b/charts/kokoro-fastapi/examples/gpu-operator-values.yaml
@ -0,0 +1,56 @@
+# Follow the official NVIDIA GPU Operator documentation
+#   to install the GPU operator with these settings:
+#     https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html
+#
+# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS.
+# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs.
+# So each pod has access to a smaller gpu with 4gb of memory.
+#
+devicePlugin: # Remove this if you dont want to use time-slicing
+  config:
+    create: true
+    name: "time-slicing-config"
+    default: "any"
+    data:
+      any: |-
+        version: v1
+        flags:
+          migStrategy: none
+        sharing:
+          timeSlicing:
+            resources:
+            - name: nvidia.com/gpu
+              replicas: 4
+
+daemonsets:
+  tolerations:
+    - key: "sku"
+      operator: Equal
+      value: "gpu"
+      effect: NoSchedule
+    - key: "kubernetes.azure.com/scalesetpriority"
+      operator: Equal
+      value: "spot"
+      effect: NoSchedule
+
+node-feature-discovery:
+  master:
+    tolerations:
+      - key: "sku"
+        operator: Equal
+        value: "gpu"
+        effect: NoSchedule
+      - key: "kubernetes.azure.com/scalesetpriority"
+        operator: Equal
+        value: "spot"
+        effect: NoSchedule
+  worker:
+    tolerations:
+      - key: "sku"
+        operator: Equal
+        value: "gpu"
+        effect: NoSchedule
+      - key: "kubernetes.azure.com/scalesetpriority"
+        operator: Equal
+        value: "spot"
+        effect: NoSchedule
--- a/charts/kokoro-fastapi/templates/NOTES.txt
+++ b/charts/kokoro-fastapi/templates/NOTES.txt
@ -13,10 +13,10 @@
     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}'
  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
-  echo http://$SERVICE_IP:{{ .Values.service.port }}
+  echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }}
 {{- else if contains "ClusterIP" .Values.service.type }}
  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
-  echo "Visit http://127.0.0.1:8080 to use your application"
-  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+  echo "Visit http://127.0.0.1:8880 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT
 {{- end }}
--- a/charts/kokoro-fastapi/templates/ingress.yaml
+++ b/charts/kokoro-fastapi/templates/ingress.yaml
@ -1,82 +1,43 @@
 {{- if .Values.ingress.enabled -}}
-{{- $fullName := include "kokoro-fastapi.fullname" . -}}
-{{- $svcPort := .Values.service.port -}}
-{{- $rewriteTargets := (list) -}}
-{{- with .Values.ingress.host }}
-  {{- range .endpoints }}
-    {{- $serviceName := default $fullName .serviceName -}}
-    {{- $rewrite := .rewrite | default "none" -}}
-    {{- if not (has $rewrite $rewriteTargets  ) -}}
-    {{- $rewriteTargets = append $rewriteTargets $rewrite -}}
-    {{- end -}}
-  {{- end}}
-{{- end }}
-{{- range $key := $rewriteTargets }}
-{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}}
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
-{{- if eq $key "none" }}
-  name: {{ $fullName }}
-{{- else }}
-  name: {{ $fullName }}-{{ $expandedRewrite }}
-{{- end }}
+  name: {{ include "kokoro-fastapi.fullname" . }}
  labels:
-    {{- include "kokoro-fastapi.labels" $ | nindent 4 }}
-    {{- if ne $key "none" }}
+    {{- include "kokoro-fastapi.labels" . | nindent 4 }}
+  {{- with .Values.ingress.annotations }}
  annotations:
-    nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2
-    {{- end }}
-spec:
-{{- if $.Values.ingress.tls }}
-  tls:
-  {{- range $.Values.ingress.tls }}
-    - hosts:
-      {{- range .hosts }}
-        - {{ . | quote }}
-      {{- end }}
-      secretName: {{ .secretName }}
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ingress.className }}
+  ingressClassName: {{ . }}
+  {{- end }}
+  {{- if .Values.ingress.tls }}
+  tls:
+    {{- range .Values.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
  {{- end }}
-{{- end }}
  rules:
-  {{- with $.Values.ingress.host }}
-    - host: {{ .name | quote }}
+    {{- range .Values.ingress.hosts }}
+    - host: {{ .host | quote }}
      http:
        paths:
-        {{- range .endpoints }}
-          {{- $serviceName := default $fullName .serviceName -}}
-          {{- $servicePort := default (print "http") .servicePort -}}
-          {{- if eq ( .rewrite | default "none" ) $key }}
-            {{- range .paths }}
-              {{- if not (contains "@" .) }}
-                {{- if eq $key "none" }}
-            - path: {{ . }}
-                {{- else }}
-            - path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*)
-                {{- end }}
-              pathType: Prefix
-              backend:
-                service:
-                  name: "{{ $fullName }}-{{ $serviceName }}"
-                  port:
-                    number: {{ $servicePort }}
-              {{- else }}
-                {{- $path := . -}}
-                {{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}}
-                {{- range $count, $e := until ($replicaCount|int) }}
-            - path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*)
-              pathType: Prefix
-              backend:
-                service:
-                  name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}"
-                  port:
-                    number: {{ $servicePort }}
-                {{- end }}
-              {{- end }}
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- with .pathType }}
+            pathType: {{ . }}
            {{- end }}
+            backend:
+              service:
+                name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service
+                port:
+                  number: {{ $.Values.kokoroTTS.port }}
          {{- end }}
-        {{- end }}
-  {{- end }}
---
-{{- end }}
+    {{- end }}
 {{- end }}
--- a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
+++ b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
@ -20,7 +20,7 @@ spec:
      labels:
        {{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }}
    spec:
-      {{- with .Values.images.imagePullSecrets }}
+      {{- with .Values.kokoroTTS.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
@ -49,10 +49,16 @@ spec:
            httpGet:
              path: /health
              port: kokoro-tts-http
+            initialDelaySeconds: 30
+            periodSeconds: 30
+            timeoutSeconds: 5
          readinessProbe:
            httpGet:
              path: /health
              port: kokoro-tts-http
+            initialDelaySeconds: 30
+            periodSeconds: 30
+            timeoutSeconds: 5
          resources:
            {{- toYaml .Values.kokoroTTS.resources | nindent 12 }}
          volumeMounts: []
--- a/charts/kokoro-fastapi/templates/tests/test-connection.yaml
+++ b/charts/kokoro-fastapi/templates/tests/test-connection.yaml
@ -11,5 +11,5 @@ spec:
    - name: wget
      image: busybox
      command: ['wget']
-      args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}']
+      args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}']
  restartPolicy: Never
--- a/charts/kokoro-fastapi/values.yaml
+++ b/charts/kokoro-fastapi/values.yaml
@ -1,12 +1,19 @@
 # Default values for kokoro-fastapi.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
-
-replicaCount: 1
-
-images:
-  pullPolicy: "Always"
-  imagePullSecrets: [ ]
+kokoroTTS:
+  replicaCount: 1
+  # The name of the deployment repository
+  repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
+  imagePullSecrets: [] # Set if using a private image or getting rate limited
+  tag: "latest"
+  pullPolicy: Always
+  port: 8880
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+    requests:
+      nvidia.com/gpu: 1

 nameOverride: ""
 fullnameOverride: ""
@ -38,47 +45,21 @@ service:

 ingress:
  enabled: false
-  className: ""
+  className: "nginx"
  annotations: {}
-    # kubernetes.io/ingress.class: nginx
-    # kubernetes.io/tls-acme: "true"
-  host:
-    name: kokoro.example.com
-    endpoints:
-      - paths:
-          - "/"
-        serviceName: "fastapi"
-        servicePort: 8880
+    # cert-manager.io/cluster-issuer: letsencrypt-prod
+    # external-dns.alpha.kubernetes.io/hostname: kokoro.example.com
+    # external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
+  hosts:
+    - host: kokoro.example.com
+      paths:
+        - path: /
+          pathType: Prefix

  tls: []
-  #  - secretName: chart-example-tls
+  #  - secretName: kokoro-fastapi-tls
  #    hosts:
-  #      - chart-example.local
-
-kokoroTTS:
-  repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
-  tag: "latest"
-  pullPolicy: Always
-  serviceName: "fastapi"
-  port: 8880
-  replicaCount: 1
-  resources:
-    limits:
-      nvidia.com/gpu: 1
-    requests:
-      nvidia.com/gpu: 1
-
-
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  # limits:
-  #   cpu: 100m
-  #   memory: 128Mi
-  # requests:
-  #   cpu: 100m
-  #   memory: 128Mi
+  #      - kokoro.example.com

 autoscaling:
  enabled: false