mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-08-05 16:48:53 +00:00
Merge pull request #257 from richardr1126/update-helm-charts
Some checks failed
CI / test (3.10) (push) Has been cancelled
Some checks failed
CI / test (3.10) (push) Has been cancelled
Fix Helm charts health check, ingress, and values
This commit is contained in:
commit
d0c13f6401
8 changed files with 183 additions and 137 deletions
|
@ -1,24 +1,12 @@
|
|||
apiVersion: v2
|
||||
name: kokoro-fastapi
|
||||
description: A Helm chart for kokoro-fastapi
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes
|
||||
type: application
|
||||
version: 0.2.0
|
||||
appVersion: "0.2.0"
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
||||
keywords:
|
||||
- tts
|
||||
- fastapi
|
||||
- gpu
|
||||
- kokoro
|
||||
|
|
54
charts/kokoro-fastapi/examples/aks-tls-values.yaml
Normal file
54
charts/kokoro-fastapi/examples/aks-tls-values.yaml
Normal file
|
@ -0,0 +1,54 @@
|
|||
# Tested on
|
||||
# - Azure AKS with GPU node pool with Nvidia GPU operator
|
||||
# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests
|
||||
#
|
||||
# Azure CLI command to create a GPU node pool:
|
||||
# az aks nodepool add \
|
||||
# --resource-group $AZ_RESOURCE_GROUP \
|
||||
# --cluster-name $CLUSTER_NAME \
|
||||
# --name t4gpus \
|
||||
# --node-vm-size Standard_NC4as_T4_v3 \
|
||||
# --node-count 2 \
|
||||
# --enable-cluster-autoscaler \
|
||||
# --min-count 1 \
|
||||
# --max-count 2 \
|
||||
# --priority Spot \
|
||||
# --eviction-policy Delete \
|
||||
# --spot-max-price -1 \
|
||||
# --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \
|
||||
# --skip-gpu-driver-install
|
||||
|
||||
kokoroTTS:
|
||||
replicaCount: 8
|
||||
port: 8880
|
||||
tag: v0.2.0
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator
|
||||
tolerations:
|
||||
- key: "kubernetes.azure.com/scalesetpriority"
|
||||
operator: Equal
|
||||
value: "spot"
|
||||
effect: NoSchedule
|
||||
- key: "sku"
|
||||
operator: Equal
|
||||
value: "gpu"
|
||||
effect: NoSchedule
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: "nginx"
|
||||
annotations:
|
||||
# Requires cert-manager and external-dns to be in the cluster for TLS and DNS
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname
|
||||
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
|
||||
hosts:
|
||||
- host: your-external-dns-enabled-hostname
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: kokoro-fastapi-tls
|
||||
hosts:
|
||||
- your-external-dns-enabled-hostname
|
56
charts/kokoro-fastapi/examples/gpu-operator-values.yaml
Normal file
56
charts/kokoro-fastapi/examples/gpu-operator-values.yaml
Normal file
|
@ -0,0 +1,56 @@
|
|||
# Follow the official NVIDIA GPU Operator documentation
|
||||
# to install the GPU operator with these settings:
|
||||
# https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html
|
||||
#
|
||||
# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS.
|
||||
# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs.
|
||||
# So each pod has access to a smaller gpu with 4gb of memory.
|
||||
#
|
||||
devicePlugin: # Remove this if you dont want to use time-slicing
|
||||
config:
|
||||
create: true
|
||||
name: "time-slicing-config"
|
||||
default: "any"
|
||||
data:
|
||||
any: |-
|
||||
version: v1
|
||||
flags:
|
||||
migStrategy: none
|
||||
sharing:
|
||||
timeSlicing:
|
||||
resources:
|
||||
- name: nvidia.com/gpu
|
||||
replicas: 4
|
||||
|
||||
daemonsets:
|
||||
tolerations:
|
||||
- key: "sku"
|
||||
operator: Equal
|
||||
value: "gpu"
|
||||
effect: NoSchedule
|
||||
- key: "kubernetes.azure.com/scalesetpriority"
|
||||
operator: Equal
|
||||
value: "spot"
|
||||
effect: NoSchedule
|
||||
|
||||
node-feature-discovery:
|
||||
master:
|
||||
tolerations:
|
||||
- key: "sku"
|
||||
operator: Equal
|
||||
value: "gpu"
|
||||
effect: NoSchedule
|
||||
- key: "kubernetes.azure.com/scalesetpriority"
|
||||
operator: Equal
|
||||
value: "spot"
|
||||
effect: NoSchedule
|
||||
worker:
|
||||
tolerations:
|
||||
- key: "sku"
|
||||
operator: Equal
|
||||
value: "gpu"
|
||||
effect: NoSchedule
|
||||
- key: "kubernetes.azure.com/scalesetpriority"
|
||||
operator: Equal
|
||||
value: "spot"
|
||||
effect: NoSchedule
|
|
@ -13,10 +13,10 @@
|
|||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
echo "Visit http://127.0.0.1:8880 to use your application"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
|
|
|
@ -1,82 +1,43 @@
|
|||
{{- if .Values.ingress.enabled -}}
|
||||
{{- $fullName := include "kokoro-fastapi.fullname" . -}}
|
||||
{{- $svcPort := .Values.service.port -}}
|
||||
{{- $rewriteTargets := (list) -}}
|
||||
{{- with .Values.ingress.host }}
|
||||
{{- range .endpoints }}
|
||||
{{- $serviceName := default $fullName .serviceName -}}
|
||||
{{- $rewrite := .rewrite | default "none" -}}
|
||||
{{- if not (has $rewrite $rewriteTargets ) -}}
|
||||
{{- $rewriteTargets = append $rewriteTargets $rewrite -}}
|
||||
{{- end -}}
|
||||
{{- end}}
|
||||
{{- end }}
|
||||
{{- range $key := $rewriteTargets }}
|
||||
{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
{{- if eq $key "none" }}
|
||||
name: {{ $fullName }}
|
||||
{{- else }}
|
||||
name: {{ $fullName }}-{{ $expandedRewrite }}
|
||||
{{- end }}
|
||||
name: {{ include "kokoro-fastapi.fullname" . }}
|
||||
labels:
|
||||
{{- include "kokoro-fastapi.labels" $ | nindent 4 }}
|
||||
{{- if ne $key "none" }}
|
||||
{{- include "kokoro-fastapi.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if $.Values.ingress.tls }}
|
||||
tls:
|
||||
{{- range $.Values.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.ingress.className }}
|
||||
ingressClassName: {{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.ingress.tls }}
|
||||
tls:
|
||||
{{- range .Values.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
rules:
|
||||
{{- with $.Values.ingress.host }}
|
||||
- host: {{ .name | quote }}
|
||||
{{- range .Values.ingress.hosts }}
|
||||
- host: {{ .host | quote }}
|
||||
http:
|
||||
paths:
|
||||
{{- range .endpoints }}
|
||||
{{- $serviceName := default $fullName .serviceName -}}
|
||||
{{- $servicePort := default (print "http") .servicePort -}}
|
||||
{{- if eq ( .rewrite | default "none" ) $key }}
|
||||
{{- range .paths }}
|
||||
{{- if not (contains "@" .) }}
|
||||
{{- if eq $key "none" }}
|
||||
- path: {{ . }}
|
||||
{{- else }}
|
||||
- path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*)
|
||||
{{- end }}
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: "{{ $fullName }}-{{ $serviceName }}"
|
||||
port:
|
||||
number: {{ $servicePort }}
|
||||
{{- else }}
|
||||
{{- $path := . -}}
|
||||
{{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}}
|
||||
{{- range $count, $e := until ($replicaCount|int) }}
|
||||
- path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*)
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}"
|
||||
port:
|
||||
number: {{ $servicePort }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- range .paths }}
|
||||
- path: {{ .path }}
|
||||
{{- with .pathType }}
|
||||
pathType: {{ . }}
|
||||
{{- end }}
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service
|
||||
port:
|
||||
number: {{ $.Values.kokoroTTS.port }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
|
|
@ -20,7 +20,7 @@ spec:
|
|||
labels:
|
||||
{{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }}
|
||||
spec:
|
||||
{{- with .Values.images.imagePullSecrets }}
|
||||
{{- with .Values.kokoroTTS.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
|
@ -49,10 +49,16 @@ spec:
|
|||
httpGet:
|
||||
path: /health
|
||||
port: kokoro-tts-http
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: kokoro-tts-http
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
resources:
|
||||
{{- toYaml .Values.kokoroTTS.resources | nindent 12 }}
|
||||
volumeMounts: []
|
||||
|
|
|
@ -11,5 +11,5 @@ spec:
|
|||
- name: wget
|
||||
image: busybox
|
||||
command: ['wget']
|
||||
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}']
|
||||
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}']
|
||||
restartPolicy: Never
|
||||
|
|
|
@ -1,12 +1,19 @@
|
|||
# Default values for kokoro-fastapi.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
replicaCount: 1
|
||||
|
||||
images:
|
||||
pullPolicy: "Always"
|
||||
imagePullSecrets: [ ]
|
||||
kokoroTTS:
|
||||
replicaCount: 1
|
||||
# The name of the deployment repository
|
||||
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
|
||||
imagePullSecrets: [] # Set if using a private image or getting rate limited
|
||||
tag: "latest"
|
||||
pullPolicy: Always
|
||||
port: 8880
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
requests:
|
||||
nvidia.com/gpu: 1
|
||||
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
@ -38,47 +45,21 @@ service:
|
|||
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
className: "nginx"
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
host:
|
||||
name: kokoro.example.com
|
||||
endpoints:
|
||||
- paths:
|
||||
- "/"
|
||||
serviceName: "fastapi"
|
||||
servicePort: 8880
|
||||
# cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
# external-dns.alpha.kubernetes.io/hostname: kokoro.example.com
|
||||
# external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
|
||||
hosts:
|
||||
- host: kokoro.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# - secretName: kokoro-fastapi-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
kokoroTTS:
|
||||
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
|
||||
tag: "latest"
|
||||
pullPolicy: Always
|
||||
serviceName: "fastapi"
|
||||
port: 8880
|
||||
replicaCount: 1
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
requests:
|
||||
nvidia.com/gpu: 1
|
||||
|
||||
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# - kokoro.example.com
|
||||
|
||||
autoscaling:
|
||||
enabled: false
|
||||
|
|
Loading…
Add table
Reference in a new issue