diff --git a/charts/kokoro-fastapi/Chart.yaml b/charts/kokoro-fastapi/Chart.yaml index bd0cf5d..0f79d40 100644 --- a/charts/kokoro-fastapi/Chart.yaml +++ b/charts/kokoro-fastapi/Chart.yaml @@ -1,24 +1,12 @@ apiVersion: v2 name: kokoro-fastapi -description: A Helm chart for kokoro-fastapi - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. +description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes type: application +version: 0.2.0 +appVersion: "0.2.0" -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 - -# This is the version number of the application being deployed. This version number should be -# incremented each time you make changes to the application. Versions are not expected to -# follow Semantic Versioning. They should reflect the version the application is using. -# It is recommended to use it with quotes. -appVersion: "1.16.0" +keywords: + - tts + - fastapi + - gpu + - kokoro diff --git a/charts/kokoro-fastapi/aks-tls-values.yaml b/charts/kokoro-fastapi/aks-tls-values.yaml new file mode 100644 index 0000000..2a6d13d --- /dev/null +++ b/charts/kokoro-fastapi/aks-tls-values.yaml @@ -0,0 +1,54 @@ +# Tested on +# - Azure AKS with GPU node pool with Nvidia GPU operator +# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests +# +# Azure CLI command to create a GPU node pool: +# az aks nodepool add \ +# --resource-group $AZ_RESOURCE_GROUP \ +# --cluster-name $CLUSTER_NAME \ +# --name t4gpus \ +# --node-vm-size Standard_NC4as_T4_v3 \ +# --node-count 2 \ +# --enable-cluster-autoscaler \ +# --min-count 1 \ +# --max-count 2 \ +# --priority Spot \ +# --eviction-policy Delete \ +# --spot-max-price -1 \ +# --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \ +# --skip-gpu-driver-install + +kokoroTTS: + replicaCount: 2 + port: 8880 + tag: v0.2.0 + pullPolicy: IfNotPresent + +# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator +tolerations: + - key: "kubernetes.azure.com/scalesetpriority" + operator: Equal + value: "spot" + effect: NoSchedule + - key: "sku" + operator: Equal + value: "gpu" + effect: NoSchedule + +ingress: + enabled: true + className: "nginx" + annotations: + # Requires cert-manager and external-dns to be in the cluster for TLS and DNS + cert-manager.io/cluster-issuer: letsencrypt-prod + external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname + external-dns.alpha.kubernetes.io/cloudflare-proxied: "false" + hosts: + - host: your-external-dns-enabled-hostname + paths: + - path: / + pathType: Prefix + tls: + - secretName: kokoro-fastapi-tls + hosts: + - your-external-dns-enabled-hostname \ No newline at end of file diff --git a/charts/kokoro-fastapi/templates/NOTES.txt b/charts/kokoro-fastapi/templates/NOTES.txt index 88b8980..bc009b8 100644 --- a/charts/kokoro-fastapi/templates/NOTES.txt +++ b/charts/kokoro-fastapi/templates/NOTES.txt @@ -13,10 +13,10 @@ NOTE: It may take a few minutes for the LoadBalancer IP to be available. You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}' export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") - echo http://$SERVICE_IP:{{ .Values.service.port }} + echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }} {{- else if contains "ClusterIP" .Values.service.type }} export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") - echo "Visit http://127.0.0.1:8080 to use your application" - kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT + echo "Visit http://127.0.0.1:8880 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT {{- end }} diff --git a/charts/kokoro-fastapi/templates/ingress.yaml b/charts/kokoro-fastapi/templates/ingress.yaml index 09a8fb5..a9c9f4e 100644 --- a/charts/kokoro-fastapi/templates/ingress.yaml +++ b/charts/kokoro-fastapi/templates/ingress.yaml @@ -1,82 +1,43 @@ {{- if .Values.ingress.enabled -}} -{{- $fullName := include "kokoro-fastapi.fullname" . -}} -{{- $svcPort := .Values.service.port -}} -{{- $rewriteTargets := (list) -}} -{{- with .Values.ingress.host }} - {{- range .endpoints }} - {{- $serviceName := default $fullName .serviceName -}} - {{- $rewrite := .rewrite | default "none" -}} - {{- if not (has $rewrite $rewriteTargets ) -}} - {{- $rewriteTargets = append $rewriteTargets $rewrite -}} - {{- end -}} - {{- end}} -{{- end }} -{{- range $key := $rewriteTargets }} -{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: -{{- if eq $key "none" }} - name: {{ $fullName }} -{{- else }} - name: {{ $fullName }}-{{ $expandedRewrite }} -{{- end }} + name: {{ include "kokoro-fastapi.fullname" . }} labels: - {{- include "kokoro-fastapi.labels" $ | nindent 4 }} - {{- if ne $key "none" }} + {{- include "kokoro-fastapi.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} annotations: - nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2 - {{- end }} -spec: -{{- if $.Values.ingress.tls }} - tls: - {{- range $.Values.ingress.tls }} - - hosts: - {{- range .hosts }} - - {{ . | quote }} - {{- end }} - secretName: {{ .secretName }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} {{- end }} -{{- end }} rules: - {{- with $.Values.ingress.host }} - - host: {{ .name | quote }} + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} http: paths: - {{- range .endpoints }} - {{- $serviceName := default $fullName .serviceName -}} - {{- $servicePort := default (print "http") .servicePort -}} - {{- if eq ( .rewrite | default "none" ) $key }} - {{- range .paths }} - {{- if not (contains "@" .) }} - {{- if eq $key "none" }} - - path: {{ . }} - {{- else }} - - path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*) - {{- end }} - pathType: Prefix - backend: - service: - name: "{{ $fullName }}-{{ $serviceName }}" - port: - number: {{ $servicePort }} - {{- else }} - {{- $path := . -}} - {{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}} - {{- range $count, $e := until ($replicaCount|int) }} - - path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*) - pathType: Prefix - backend: - service: - name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}" - port: - number: {{ $servicePort }} - {{- end }} - {{- end }} + {{- range .paths }} + - path: {{ .path }} + {{- with .pathType }} + pathType: {{ . }} {{- end }} + backend: + service: + name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service + port: + number: {{ $.Values.kokoroTTS.port }} {{- end }} - {{- end }} - {{- end }} ---- -{{- end }} + {{- end }} {{- end }} diff --git a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml index be1f67b..2178a08 100644 --- a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml +++ b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml @@ -20,7 +20,7 @@ spec: labels: {{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }} spec: - {{- with .Values.images.imagePullSecrets }} + {{- with .Values.kokoroTTS.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} @@ -49,10 +49,16 @@ spec: httpGet: path: /health port: kokoro-tts-http + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 readinessProbe: httpGet: path: /health port: kokoro-tts-http + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 resources: {{- toYaml .Values.kokoroTTS.resources | nindent 12 }} volumeMounts: [] diff --git a/charts/kokoro-fastapi/templates/tests/test-connection.yaml b/charts/kokoro-fastapi/templates/tests/test-connection.yaml index 120583f..8b912c6 100644 --- a/charts/kokoro-fastapi/templates/tests/test-connection.yaml +++ b/charts/kokoro-fastapi/templates/tests/test-connection.yaml @@ -11,5 +11,5 @@ spec: - name: wget image: busybox command: ['wget'] - args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}'] + args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}'] restartPolicy: Never diff --git a/charts/kokoro-fastapi/values.yaml b/charts/kokoro-fastapi/values.yaml index 0db2f95..e2e37e4 100644 --- a/charts/kokoro-fastapi/values.yaml +++ b/charts/kokoro-fastapi/values.yaml @@ -1,12 +1,19 @@ # Default values for kokoro-fastapi. # This is a YAML-formatted file. # Declare variables to be passed into your templates. - -replicaCount: 1 - -images: - pullPolicy: "Always" - imagePullSecrets: [ ] +kokoroTTS: + replicaCount: 1 + # The name of the deployment repository + repository: "ghcr.io/remsky/kokoro-fastapi-gpu" + imagePullSecrets: [] # Set if using a private image or getting rate limited + tag: "latest" + pullPolicy: Always + port: 8880 + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 nameOverride: "" fullnameOverride: "" @@ -38,47 +45,21 @@ service: ingress: enabled: false - className: "" + className: "nginx" annotations: {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - host: - name: kokoro.example.com - endpoints: - - paths: - - "/" - serviceName: "fastapi" - servicePort: 8880 + # cert-manager.io/cluster-issuer: letsencrypt-prod + # external-dns.alpha.kubernetes.io/hostname: kokoro.example.com + # external-dns.alpha.kubernetes.io/cloudflare-proxied: "false" + hosts: + - host: kokoro.example.com + paths: + - path: / + pathType: Prefix tls: [] - # - secretName: chart-example-tls + # - secretName: kokoro-fastapi-tls # hosts: - # - chart-example.local - -kokoroTTS: - repository: "ghcr.io/remsky/kokoro-fastapi-gpu" - tag: "latest" - pullPolicy: Always - serviceName: "fastapi" - port: 8880 - replicaCount: 1 - resources: - limits: - nvidia.com/gpu: 1 - requests: - nvidia.com/gpu: 1 - - - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi + # - kokoro.example.com autoscaling: enabled: false