mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-08-05 16:48:53 +00:00
Fix Helm charts health check, ingress, and values
This commit is contained in:
parent
8d73c90224
commit
14bd6a8118
7 changed files with 127 additions and 137 deletions
|
@ -1,24 +1,12 @@
|
||||||
apiVersion: v2
|
apiVersion: v2
|
||||||
name: kokoro-fastapi
|
name: kokoro-fastapi
|
||||||
description: A Helm chart for kokoro-fastapi
|
description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes
|
||||||
|
|
||||||
# A chart can be either an 'application' or a 'library' chart.
|
|
||||||
#
|
|
||||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
|
||||||
# to be deployed.
|
|
||||||
#
|
|
||||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
|
||||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
|
||||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
|
||||||
type: application
|
type: application
|
||||||
|
version: 0.2.0
|
||||||
|
appVersion: "0.2.0"
|
||||||
|
|
||||||
# This is the chart version. This version number should be incremented each time you make changes
|
keywords:
|
||||||
# to the chart and its templates, including the app version.
|
- tts
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
- fastapi
|
||||||
version: 0.1.0
|
- gpu
|
||||||
|
- kokoro
|
||||||
# This is the version number of the application being deployed. This version number should be
|
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
|
||||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
|
||||||
# It is recommended to use it with quotes.
|
|
||||||
appVersion: "1.16.0"
|
|
||||||
|
|
54
charts/kokoro-fastapi/aks-tls-values.yaml
Normal file
54
charts/kokoro-fastapi/aks-tls-values.yaml
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
# Tested on
|
||||||
|
# - Azure AKS with GPU node pool with Nvidia GPU operator
|
||||||
|
# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests
|
||||||
|
#
|
||||||
|
# Azure CLI command to create a GPU node pool:
|
||||||
|
# az aks nodepool add \
|
||||||
|
# --resource-group $AZ_RESOURCE_GROUP \
|
||||||
|
# --cluster-name $CLUSTER_NAME \
|
||||||
|
# --name t4gpus \
|
||||||
|
# --node-vm-size Standard_NC4as_T4_v3 \
|
||||||
|
# --node-count 2 \
|
||||||
|
# --enable-cluster-autoscaler \
|
||||||
|
# --min-count 1 \
|
||||||
|
# --max-count 2 \
|
||||||
|
# --priority Spot \
|
||||||
|
# --eviction-policy Delete \
|
||||||
|
# --spot-max-price -1 \
|
||||||
|
# --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \
|
||||||
|
# --skip-gpu-driver-install
|
||||||
|
|
||||||
|
kokoroTTS:
|
||||||
|
replicaCount: 2
|
||||||
|
port: 8880
|
||||||
|
tag: v0.2.0
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator
|
||||||
|
tolerations:
|
||||||
|
- key: "kubernetes.azure.com/scalesetpriority"
|
||||||
|
operator: Equal
|
||||||
|
value: "spot"
|
||||||
|
effect: NoSchedule
|
||||||
|
- key: "sku"
|
||||||
|
operator: Equal
|
||||||
|
value: "gpu"
|
||||||
|
effect: NoSchedule
|
||||||
|
|
||||||
|
ingress:
|
||||||
|
enabled: true
|
||||||
|
className: "nginx"
|
||||||
|
annotations:
|
||||||
|
# Requires cert-manager and external-dns to be in the cluster for TLS and DNS
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||||
|
external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname
|
||||||
|
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
|
||||||
|
hosts:
|
||||||
|
- host: your-external-dns-enabled-hostname
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
tls:
|
||||||
|
- secretName: kokoro-fastapi-tls
|
||||||
|
hosts:
|
||||||
|
- your-external-dns-enabled-hostname
|
|
@ -13,10 +13,10 @@
|
||||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||||
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}'
|
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}'
|
||||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }}
|
||||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
echo "Visit http://127.0.0.1:8880 to use your application"
|
||||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
|
@ -1,36 +1,21 @@
|
||||||
{{- if .Values.ingress.enabled -}}
|
{{- if .Values.ingress.enabled -}}
|
||||||
{{- $fullName := include "kokoro-fastapi.fullname" . -}}
|
|
||||||
{{- $svcPort := .Values.service.port -}}
|
|
||||||
{{- $rewriteTargets := (list) -}}
|
|
||||||
{{- with .Values.ingress.host }}
|
|
||||||
{{- range .endpoints }}
|
|
||||||
{{- $serviceName := default $fullName .serviceName -}}
|
|
||||||
{{- $rewrite := .rewrite | default "none" -}}
|
|
||||||
{{- if not (has $rewrite $rewriteTargets ) -}}
|
|
||||||
{{- $rewriteTargets = append $rewriteTargets $rewrite -}}
|
|
||||||
{{- end -}}
|
|
||||||
{{- end}}
|
|
||||||
{{- end }}
|
|
||||||
{{- range $key := $rewriteTargets }}
|
|
||||||
{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}}
|
|
||||||
apiVersion: networking.k8s.io/v1
|
apiVersion: networking.k8s.io/v1
|
||||||
kind: Ingress
|
kind: Ingress
|
||||||
metadata:
|
metadata:
|
||||||
{{- if eq $key "none" }}
|
name: {{ include "kokoro-fastapi.fullname" . }}
|
||||||
name: {{ $fullName }}
|
|
||||||
{{- else }}
|
|
||||||
name: {{ $fullName }}-{{ $expandedRewrite }}
|
|
||||||
{{- end }}
|
|
||||||
labels:
|
labels:
|
||||||
{{- include "kokoro-fastapi.labels" $ | nindent 4 }}
|
{{- include "kokoro-fastapi.labels" . | nindent 4 }}
|
||||||
{{- if ne $key "none" }}
|
{{- with .Values.ingress.annotations }}
|
||||||
annotations:
|
annotations:
|
||||||
nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2
|
{{- toYaml . | nindent 4 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
spec:
|
spec:
|
||||||
{{- if $.Values.ingress.tls }}
|
{{- with .Values.ingress.className }}
|
||||||
|
ingressClassName: {{ . }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.ingress.tls }}
|
||||||
tls:
|
tls:
|
||||||
{{- range $.Values.ingress.tls }}
|
{{- range .Values.ingress.tls }}
|
||||||
- hosts:
|
- hosts:
|
||||||
{{- range .hosts }}
|
{{- range .hosts }}
|
||||||
- {{ . | quote }}
|
- {{ . | quote }}
|
||||||
|
@ -39,44 +24,20 @@ spec:
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
rules:
|
rules:
|
||||||
{{- with $.Values.ingress.host }}
|
{{- range .Values.ingress.hosts }}
|
||||||
- host: {{ .name | quote }}
|
- host: {{ .host | quote }}
|
||||||
http:
|
http:
|
||||||
paths:
|
paths:
|
||||||
{{- range .endpoints }}
|
|
||||||
{{- $serviceName := default $fullName .serviceName -}}
|
|
||||||
{{- $servicePort := default (print "http") .servicePort -}}
|
|
||||||
{{- if eq ( .rewrite | default "none" ) $key }}
|
|
||||||
{{- range .paths }}
|
{{- range .paths }}
|
||||||
{{- if not (contains "@" .) }}
|
- path: {{ .path }}
|
||||||
{{- if eq $key "none" }}
|
{{- with .pathType }}
|
||||||
- path: {{ . }}
|
pathType: {{ . }}
|
||||||
{{- else }}
|
|
||||||
- path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*)
|
|
||||||
{{- end }}
|
{{- end }}
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
backend:
|
||||||
service:
|
service:
|
||||||
name: "{{ $fullName }}-{{ $serviceName }}"
|
name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service
|
||||||
port:
|
port:
|
||||||
number: {{ $servicePort }}
|
number: {{ $.Values.kokoroTTS.port }}
|
||||||
{{- else }}
|
|
||||||
{{- $path := . -}}
|
|
||||||
{{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}}
|
|
||||||
{{- range $count, $e := until ($replicaCount|int) }}
|
|
||||||
- path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*)
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}"
|
|
||||||
port:
|
|
||||||
number: {{ $servicePort }}
|
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
---
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ spec:
|
||||||
labels:
|
labels:
|
||||||
{{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }}
|
{{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }}
|
||||||
spec:
|
spec:
|
||||||
{{- with .Values.images.imagePullSecrets }}
|
{{- with .Values.kokoroTTS.imagePullSecrets }}
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
{{- toYaml . | nindent 8 }}
|
{{- toYaml . | nindent 8 }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
@ -49,10 +49,16 @@ spec:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /health
|
path: /health
|
||||||
port: kokoro-tts-http
|
port: kokoro-tts-http
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
timeoutSeconds: 5
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /health
|
path: /health
|
||||||
port: kokoro-tts-http
|
port: kokoro-tts-http
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
timeoutSeconds: 5
|
||||||
resources:
|
resources:
|
||||||
{{- toYaml .Values.kokoroTTS.resources | nindent 12 }}
|
{{- toYaml .Values.kokoroTTS.resources | nindent 12 }}
|
||||||
volumeMounts: []
|
volumeMounts: []
|
||||||
|
|
|
@ -11,5 +11,5 @@ spec:
|
||||||
- name: wget
|
- name: wget
|
||||||
image: busybox
|
image: busybox
|
||||||
command: ['wget']
|
command: ['wget']
|
||||||
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}']
|
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}']
|
||||||
restartPolicy: Never
|
restartPolicy: Never
|
||||||
|
|
|
@ -1,12 +1,19 @@
|
||||||
# Default values for kokoro-fastapi.
|
# Default values for kokoro-fastapi.
|
||||||
# This is a YAML-formatted file.
|
# This is a YAML-formatted file.
|
||||||
# Declare variables to be passed into your templates.
|
# Declare variables to be passed into your templates.
|
||||||
|
kokoroTTS:
|
||||||
replicaCount: 1
|
replicaCount: 1
|
||||||
|
# The name of the deployment repository
|
||||||
images:
|
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
|
||||||
pullPolicy: "Always"
|
imagePullSecrets: [] # Set if using a private image or getting rate limited
|
||||||
imagePullSecrets: [ ]
|
tag: "latest"
|
||||||
|
pullPolicy: Always
|
||||||
|
port: 8880
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: 1
|
||||||
|
requests:
|
||||||
|
nvidia.com/gpu: 1
|
||||||
|
|
||||||
nameOverride: ""
|
nameOverride: ""
|
||||||
fullnameOverride: ""
|
fullnameOverride: ""
|
||||||
|
@ -38,47 +45,21 @@ service:
|
||||||
|
|
||||||
ingress:
|
ingress:
|
||||||
enabled: false
|
enabled: false
|
||||||
className: ""
|
className: "nginx"
|
||||||
annotations: {}
|
annotations: {}
|
||||||
# kubernetes.io/ingress.class: nginx
|
# cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||||
# kubernetes.io/tls-acme: "true"
|
# external-dns.alpha.kubernetes.io/hostname: kokoro.example.com
|
||||||
host:
|
# external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
|
||||||
name: kokoro.example.com
|
hosts:
|
||||||
endpoints:
|
- host: kokoro.example.com
|
||||||
- paths:
|
paths:
|
||||||
- "/"
|
- path: /
|
||||||
serviceName: "fastapi"
|
pathType: Prefix
|
||||||
servicePort: 8880
|
|
||||||
|
|
||||||
tls: []
|
tls: []
|
||||||
# - secretName: chart-example-tls
|
# - secretName: kokoro-fastapi-tls
|
||||||
# hosts:
|
# hosts:
|
||||||
# - chart-example.local
|
# - kokoro.example.com
|
||||||
|
|
||||||
kokoroTTS:
|
|
||||||
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
|
|
||||||
tag: "latest"
|
|
||||||
pullPolicy: Always
|
|
||||||
serviceName: "fastapi"
|
|
||||||
port: 8880
|
|
||||||
replicaCount: 1
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
nvidia.com/gpu: 1
|
|
||||||
requests:
|
|
||||||
nvidia.com/gpu: 1
|
|
||||||
|
|
||||||
|
|
||||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
||||||
# choice for the user. This also increases chances charts run on environments with little
|
|
||||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
||||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
||||||
# limits:
|
|
||||||
# cpu: 100m
|
|
||||||
# memory: 128Mi
|
|
||||||
# requests:
|
|
||||||
# cpu: 100m
|
|
||||||
# memory: 128Mi
|
|
||||||
|
|
||||||
autoscaling:
|
autoscaling:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
Loading…
Add table
Reference in a new issue