Merge pull request #257 from richardr1126/update-helm-charts
Some checks failed
CI / test (3.10) (push) Has been cancelled

Fix Helm charts health check, ingress, and values
This commit is contained in:
Fireblade2534 2025-03-26 10:11:52 -04:00 committed by GitHub
commit d0c13f6401
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 183 additions and 137 deletions

View file

@ -1,24 +1,12 @@
apiVersion: v2
name: kokoro-fastapi
description: A Helm chart for kokoro-fastapi
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes
type: application
version: 0.2.0
appVersion: "0.2.0"
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
keywords:
- tts
- fastapi
- gpu
- kokoro

View file

@ -0,0 +1,54 @@
# Tested on
# - Azure AKS with GPU node pool with Nvidia GPU operator
# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests
#
# Azure CLI command to create a GPU node pool:
# az aks nodepool add \
# --resource-group $AZ_RESOURCE_GROUP \
# --cluster-name $CLUSTER_NAME \
# --name t4gpus \
# --node-vm-size Standard_NC4as_T4_v3 \
# --node-count 2 \
# --enable-cluster-autoscaler \
# --min-count 1 \
# --max-count 2 \
# --priority Spot \
# --eviction-policy Delete \
# --spot-max-price -1 \
# --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \
# --skip-gpu-driver-install
kokoroTTS:
replicaCount: 8
port: 8880
tag: v0.2.0
pullPolicy: IfNotPresent
# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator
tolerations:
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
ingress:
enabled: true
className: "nginx"
annotations:
# Requires cert-manager and external-dns to be in the cluster for TLS and DNS
cert-manager.io/cluster-issuer: letsencrypt-prod
external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
hosts:
- host: your-external-dns-enabled-hostname
paths:
- path: /
pathType: Prefix
tls:
- secretName: kokoro-fastapi-tls
hosts:
- your-external-dns-enabled-hostname

View file

@ -0,0 +1,56 @@
# Follow the official NVIDIA GPU Operator documentation
# to install the GPU operator with these settings:
# https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html
#
# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS.
# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs.
# So each pod has access to a smaller gpu with 4gb of memory.
#
devicePlugin: # Remove this if you dont want to use time-slicing
config:
create: true
name: "time-slicing-config"
default: "any"
data:
any: |-
version: v1
flags:
migStrategy: none
sharing:
timeSlicing:
resources:
- name: nvidia.com/gpu
replicas: 4
daemonsets:
tolerations:
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule
node-feature-discovery:
master:
tolerations:
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule
worker:
tolerations:
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule

View file

@ -13,10 +13,10 @@
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
echo "Visit http://127.0.0.1:8880 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT
{{- end }}

View file

@ -1,82 +1,43 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "kokoro-fastapi.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- $rewriteTargets := (list) -}}
{{- with .Values.ingress.host }}
{{- range .endpoints }}
{{- $serviceName := default $fullName .serviceName -}}
{{- $rewrite := .rewrite | default "none" -}}
{{- if not (has $rewrite $rewriteTargets ) -}}
{{- $rewriteTargets = append $rewriteTargets $rewrite -}}
{{- end -}}
{{- end}}
{{- end }}
{{- range $key := $rewriteTargets }}
{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
{{- if eq $key "none" }}
name: {{ $fullName }}
{{- else }}
name: {{ $fullName }}-{{ $expandedRewrite }}
{{- end }}
name: {{ include "kokoro-fastapi.fullname" . }}
labels:
{{- include "kokoro-fastapi.labels" $ | nindent 4 }}
{{- if ne $key "none" }}
{{- include "kokoro-fastapi.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2
{{- end }}
spec:
{{- if $.Values.ingress.tls }}
tls:
{{- range $.Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- with .Values.ingress.className }}
ingressClassName: {{ . }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
{{- end }}
rules:
{{- with $.Values.ingress.host }}
- host: {{ .name | quote }}
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .endpoints }}
{{- $serviceName := default $fullName .serviceName -}}
{{- $servicePort := default (print "http") .servicePort -}}
{{- if eq ( .rewrite | default "none" ) $key }}
{{- range .paths }}
{{- if not (contains "@" .) }}
{{- if eq $key "none" }}
- path: {{ . }}
{{- else }}
- path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*)
{{- end }}
pathType: Prefix
backend:
service:
name: "{{ $fullName }}-{{ $serviceName }}"
port:
number: {{ $servicePort }}
{{- else }}
{{- $path := . -}}
{{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}}
{{- range $count, $e := until ($replicaCount|int) }}
- path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*)
pathType: Prefix
backend:
service:
name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}"
port:
number: {{ $servicePort }}
{{- end }}
{{- end }}
{{- range .paths }}
- path: {{ .path }}
{{- with .pathType }}
pathType: {{ . }}
{{- end }}
backend:
service:
name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service
port:
number: {{ $.Values.kokoroTTS.port }}
{{- end }}
{{- end }}
{{- end }}
---
{{- end }}
{{- end }}
{{- end }}

View file

@ -20,7 +20,7 @@ spec:
labels:
{{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.images.imagePullSecrets }}
{{- with .Values.kokoroTTS.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
@ -49,10 +49,16 @@ spec:
httpGet:
path: /health
port: kokoro-tts-http
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /health
port: kokoro-tts-http
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 5
resources:
{{- toYaml .Values.kokoroTTS.resources | nindent 12 }}
volumeMounts: []

View file

@ -11,5 +11,5 @@ spec:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}']
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}']
restartPolicy: Never

View file

@ -1,12 +1,19 @@
# Default values for kokoro-fastapi.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
images:
pullPolicy: "Always"
imagePullSecrets: [ ]
kokoroTTS:
replicaCount: 1
# The name of the deployment repository
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
imagePullSecrets: [] # Set if using a private image or getting rate limited
tag: "latest"
pullPolicy: Always
port: 8880
resources:
limits:
nvidia.com/gpu: 1
requests:
nvidia.com/gpu: 1
nameOverride: ""
fullnameOverride: ""
@ -38,47 +45,21 @@ service:
ingress:
enabled: false
className: ""
className: "nginx"
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
host:
name: kokoro.example.com
endpoints:
- paths:
- "/"
serviceName: "fastapi"
servicePort: 8880
# cert-manager.io/cluster-issuer: letsencrypt-prod
# external-dns.alpha.kubernetes.io/hostname: kokoro.example.com
# external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
hosts:
- host: kokoro.example.com
paths:
- path: /
pathType: Prefix
tls: []
# - secretName: chart-example-tls
# - secretName: kokoro-fastapi-tls
# hosts:
# - chart-example.local
kokoroTTS:
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
tag: "latest"
pullPolicy: Always
serviceName: "fastapi"
port: 8880
replicaCount: 1
resources:
limits:
nvidia.com/gpu: 1
requests:
nvidia.com/gpu: 1
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# - kokoro.example.com
autoscaling:
enabled: false