From d712308f9866dd4886aa3f984422f8e6b43982b7 Mon Sep 17 00:00:00 2001 From: Fireblade2534 Date: Fri, 21 Mar 2025 18:03:09 +0000 Subject: [PATCH 1/3] Fixes relating to parsing money and tests. Also readme stuff --- README.md | 31 +++++++++++- api/src/routers/openai_compatible.py | 4 +- api/src/services/streaming_audio_writer.py | 2 +- .../services/text_processing/normalizer.py | 48 ++++++++++++------- .../text_processing/text_processor.py | 1 + api/src/services/tts_service.py | 2 +- api/tests/test_kokoro_v1.py | 19 ++++---- api/tests/test_normalizer.py | 7 ++- api/tests/test_text_processor.py | 15 +++++- Test Threads.py => dev/Test Threads.py | 0 Test copy.py => dev/Test copy.py | 0 dev/Test money.py | 26 ++++++++++ dev/Test num.py | 45 +++++++++++++++++ Test.py => dev/Test.py | 0 pyproject.toml | 1 + 15 files changed, 165 insertions(+), 36 deletions(-) rename Test Threads.py => dev/Test Threads.py (100%) rename Test copy.py => dev/Test copy.py (100%) create mode 100644 dev/Test money.py create mode 100644 dev/Test num.py rename Test.py => dev/Test.py (100%) diff --git a/README.md b/README.md index 6859163..5318b03 100644 --- a/README.md +++ b/README.md @@ -516,7 +516,36 @@ Monitor system state and resource usage with these endpoints: Useful for debugging resource exhaustion or performance issues. -## Known Issues +## Known Issues & Troubleshooting + +
+Missing words & Missing some timestamps + +The api will automaticly do text normalization on input text which may incorrectly remove or change some phrases. This can be disabled by adding `"normalization_options":{"normalize": false}` to your request json: +```python +import requests + +response = requests.post( + "http://localhost:8880/v1/audio/speech", + json={ + "input": "Hello world!", + "voice": "af_heart", + "response_format": "pcm", + "normalization_options": + { + "normalize": False + } + }, + stream=True +) + +for chunk in response.iter_content(chunk_size=1024): + if chunk: + # Process streaming chunks + pass +``` + +
Versioning & Development diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py index 742c216..1e89151 100644 --- a/api/src/routers/openai_compatible.py +++ b/api/src/routers/openai_compatible.py @@ -125,20 +125,18 @@ async def process_and_validate_voices(voice_input: Union[str, List[str]], tts_se async def stream_audio_chunks(tts_service: TTSService, request: Union[OpenAISpeechRequest, CaptionedSpeechRequest], client_request: Request, writer: StreamingAudioWriter) -> AsyncGenerator[AudioChunk, None]: """Stream audio chunks as they're generated with client disconnect handling""" voice_name = await process_and_validate_voices(request.voice, tts_service) - unique_properties = {"return_timestamps": False} if hasattr(request, "return_timestamps"): unique_properties["return_timestamps"] = request.return_timestamps try: - logger.info(f"Starting audio generation with lang_code: {request.lang_code}") async for chunk_data in tts_service.generate_audio_stream( text=request.input, voice=voice_name, writer=writer, speed=request.speed, output_format=request.response_format, - lang_code=request.lang_code or settings.default_voice_code or voice_name[0].lower(), + lang_code=request.lang_code, normalization_options=request.normalization_options, return_timestamps=unique_properties["return_timestamps"], ): diff --git a/api/src/services/streaming_audio_writer.py b/api/src/services/streaming_audio_writer.py index 763c5eb..75d87b4 100644 --- a/api/src/services/streaming_audio_writer.py +++ b/api/src/services/streaming_audio_writer.py @@ -25,7 +25,7 @@ class StreamingAudioWriter: if self.format in ["wav","flac","mp3","pcm","aac","opus"]: if self.format != "pcm": self.output_buffer = BytesIO() - self.container = av.open(self.output_buffer, mode="w", format=self.format) + self.container = av.open(self.output_buffer, mode="w", format=self.format if self.format != "aac" else "adts") self.stream = self.container.add_stream(codec_map[self.format],sample_rate=self.sample_rate,layout='mono' if self.channels == 1 else 'stereo') self.stream.bit_rate = 128000 else: diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py index 84c3694..0acfe4d 100644 --- a/api/src/services/text_processing/normalizer.py +++ b/api/src/services/text_processing/normalizer.py @@ -8,9 +8,11 @@ import re from functools import lru_cache import inflect from numpy import number - +from torch import mul from ...structures.schemas import NormalizationOptions +from text_to_num import text2num + # Constants VALID_TLDS = [ "com", @@ -134,25 +136,35 @@ def handle_units(u: re.Match[str]) -> str: unit[0]=INFLECT_ENGINE.no(unit[0],number) return " ".join(unit) +def conditional_int(number: float, threshold: float = 0.00001): + if abs(round(number) - number) < threshold: + return int(round(number)) + return number + def handle_money(m: re.Match[str]) -> str: """Convert money expressions to spoken form""" - m = m.group() - bill = "dollar" if m[0] == "$" else "pound" - if m[-1].isalpha(): - return f"{INFLECT_ENGINE.number_to_words(m[1:])} {bill}s" - elif "." not in m: - s = "" if m[1:] == "1" else "s" - return f"{INFLECT_ENGINE.number_to_words(m[1:])} {bill}{s}" - b, c = m[1:].split(".") - s = "" if b == "1" else "s" - c = int(c.ljust(2, "0")) - coins = ( - f"cent{'' if c == 1 else 's'}" - if m[0] == "$" - else ("penny" if c == 1 else "pence") - ) - return f"{INFLECT_ENGINE.number_to_words(b)} {bill}{s} and {INFLECT_ENGINE.number_to_words(c)} {coins}" + bill = "dollar" if m.group(2) == "$" else "pound" + coin = "cent" if m.group(2) == "$" else "pence" + number = m.group(3) + + multiplier = m.group(4) + try: + number = float(number) + except: + return m.group() + + if m.group(1) == "-": + number *= -1 + + if number % 1 == 0 or multiplier != "": + text_number = f"{INFLECT_ENGINE.number_to_words(conditional_int(number))}{multiplier} {INFLECT_ENGINE.plural(bill, count=number)}" + else: + sub_number = int(str(number).split(".")[-1].ljust(2, "0")) + + text_number = f"{INFLECT_ENGINE.number_to_words(int(round(number)))} {INFLECT_ENGINE.plural(bill, count=number)} and {INFLECT_ENGINE.number_to_words(sub_number)} {INFLECT_ENGINE.plural(coin, count=sub_number)}" + + return text_number def handle_decimal(num: re.Match[str]) -> str: """Convert decimal numbers to spoken form""" @@ -297,7 +309,7 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str text = re.sub(r"(?<=\d),(?=\d)", "", text) text = re.sub( - r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b", + r"(?i)(-?)([$£])(\d+(?:\.\d+)?)((?: hundred| thousand| (?:[bm]|tr|quadr)illion)*)\b", handle_money, text, ) diff --git a/api/src/services/text_processing/text_processor.py b/api/src/services/text_processing/text_processor.py index 0d8d36c..0bd4658 100644 --- a/api/src/services/text_processing/text_processor.py +++ b/api/src/services/text_processing/text_processor.py @@ -134,6 +134,7 @@ async def smart_split( # Normalize text if settings.advanced_text_normalization and normalization_options.normalize: + print(lang_code) if lang_code in ["a","b","en-us","en-gb"]: text = CUSTOM_PHONEMES.sub(lambda s: handle_custom_phonemes(s, custom_phoneme_list), text) text=normalize_text(text,normalization_options) diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py index f740a29..8a6bb42 100644 --- a/api/src/services/tts_service.py +++ b/api/src/services/tts_service.py @@ -258,7 +258,7 @@ class TTSService: logger.info(f"Using lang_code '{pipeline_lang_code}' for voice '{voice_name}' in audio stream") # Process text in chunks with smart splitting - async for chunk_text, tokens in smart_split(text, lang_code=lang_code, normalization_options=normalization_options): + async for chunk_text, tokens in smart_split(text, lang_code=pipeline_lang_code, normalization_options=normalization_options): try: # Process audio for chunk async for chunk_data in self._process_chunk( diff --git a/api/tests/test_kokoro_v1.py b/api/tests/test_kokoro_v1.py index 850ed05..29d83c5 100644 --- a/api/tests/test_kokoro_v1.py +++ b/api/tests/test_kokoro_v1.py @@ -23,19 +23,18 @@ def test_initial_state(kokoro_backend): @patch("torch.cuda.is_available", return_value=True) -@patch("torch.cuda.memory_allocated") +@patch("torch.cuda.memory_allocated", return_value=5e9) def test_memory_management(mock_memory, mock_cuda, kokoro_backend): """Test GPU memory management functions.""" - # Mock GPU memory usage - mock_memory.return_value = 5e9 # 5GB + # Patch backend so it thinks we have cuda + with patch.object(kokoro_backend, "_device", "cuda"): + # Test memory check + with patch("api.src.inference.kokoro_v1.model_config") as mock_config: + mock_config.pytorch_gpu.memory_threshold = 4 + assert kokoro_backend._check_memory() == True - # Test memory check - with patch("api.src.inference.kokoro_v1.model_config") as mock_config: - mock_config.pytorch_gpu.memory_threshold = 4 - assert kokoro_backend._check_memory() == True - - mock_config.pytorch_gpu.memory_threshold = 6 - assert kokoro_backend._check_memory() == False + mock_config.pytorch_gpu.memory_threshold = 6 + assert kokoro_backend._check_memory() == False @patch("torch.cuda.empty_cache") diff --git a/api/tests/test_normalizer.py b/api/tests/test_normalizer.py index 0aa963e..6dd4342 100644 --- a/api/tests/test_normalizer.py +++ b/api/tests/test_normalizer.py @@ -83,7 +83,12 @@ def test_url_email_addresses(): == "Send to test dot user at site dot com" ) - +def test_money(): + """Test that money text is normalized correctly""" + assert normalize_text("He lost $5.3 thousand.",normalization_options=NormalizationOptions()) == "He lost five point three thousand dollars." + assert normalize_text("To put it weirdly -$6.9 million",normalization_options=NormalizationOptions()) == "To put it weirdly minus six point nine million dollars" + assert normalize_text("It costs $50.3.",normalization_options=NormalizationOptions()) == "It costs fifty dollars and thirty cents." + def test_non_url_text(): """Test that non-URL text is unaffected""" assert normalize_text("This is not.a.url text",normalization_options=NormalizationOptions()) == "This is not-a-url text" diff --git a/api/tests/test_text_processor.py b/api/tests/test_text_processor.py index 3d844b1..7e5fb0f 100644 --- a/api/tests/test_text_processor.py +++ b/api/tests/test_text_processor.py @@ -34,7 +34,7 @@ def test_process_text_chunk_phonemes(): def test_get_sentence_info(): """Test sentence splitting and info extraction.""" text = "This is sentence one. This is sentence two! What about three?" - results = get_sentence_info(text) + results = get_sentence_info(text, {}) assert len(results) == 3 for sentence, tokens, count in results: @@ -44,6 +44,19 @@ def test_get_sentence_info(): assert count == len(tokens) assert count > 0 +def test_get_sentence_info_phenomoes(): + """Test sentence splitting and info extraction.""" + text = "This is sentence one. This is two! What about three?" + results = get_sentence_info(text, {"": r"sˈɛntᵊns"}) + + assert len(results) == 3 + assert "sˈɛntᵊns" in results[1][0] + for sentence, tokens, count in results: + assert isinstance(sentence, str) + assert isinstance(tokens, list) + assert isinstance(count, int) + assert count == len(tokens) + assert count > 0 @pytest.mark.asyncio async def test_smart_split_short_text(): diff --git a/Test Threads.py b/dev/Test Threads.py similarity index 100% rename from Test Threads.py rename to dev/Test Threads.py diff --git a/Test copy.py b/dev/Test copy.py similarity index 100% rename from Test copy.py rename to dev/Test copy.py diff --git a/dev/Test money.py b/dev/Test money.py new file mode 100644 index 0000000..4956070 --- /dev/null +++ b/dev/Test money.py @@ -0,0 +1,26 @@ +import requests +import base64 +import json + +text="""the administration has offered up a platter of repression for more than a year and is still slated to lose $400 million. + +Columbia is the largest private landowner in New York City and boasts an endowment of $14.8 billion;""" + + +Type="wav" + +response = requests.post( + "http://localhost:8880/v1/audio/speech", + json={ + "model": "kokoro", + "input": text, + "voice": "af_heart+af_sky", + "speed": 1.0, + "response_format": Type, + "stream": False, + }, + stream=True +) + +with open(f"outputnostreammoney.{Type}", "wb") as f: + f.write(response.content) diff --git a/dev/Test num.py b/dev/Test num.py new file mode 100644 index 0000000..15bd4d7 --- /dev/null +++ b/dev/Test num.py @@ -0,0 +1,45 @@ +from text_to_num import text2num +import re +import inflect +from torch import mul + +INFLECT_ENGINE = inflect.engine() + + +def conditional_int(number: float, threshold: float = 0.00001): + if abs(round(number) - number) < threshold: + return int(round(number)) + return number + +def handle_money(m: re.Match[str]) -> str: + """Convert money expressions to spoken form""" + + bill = "dollar" if m.group(2) == "$" else "pound" + coin = "cent" if m.group(2) == "$" else "pence" + number = m.group(3) + + multiplier = m.group(4) + try: + number = float(number) + except: + return m.group() + + if m.group(1) == "-": + number *= -1 + + if number % 1 == 0 or multiplier != "": + text_number = f"{INFLECT_ENGINE.number_to_words(conditional_int(number))}{multiplier} {INFLECT_ENGINE.plural(bill, count=number)}" + else: + sub_number = int(str(number).split(".")[-1].ljust(2, "0")) + + text_number = f"{INFLECT_ENGINE.number_to_words(int(round(number)))} {INFLECT_ENGINE.plural(bill, count=number)} and {INFLECT_ENGINE.number_to_words(sub_number)} {INFLECT_ENGINE.plural(coin, count=sub_number)}" + + return text_number + + +text = re.sub( + r"(?i)(-?)([$£])(\d+(?:\.\d+)?)((?: hundred| thousand| (?:[bm]|tr|quadr)illion)*)\b", + handle_money, + "he administration has offered up a platter of repression for more than a year and is still slated to lose -$5.3 billion", +) +print(text) diff --git a/Test.py b/dev/Test.py similarity index 100% rename from Test.py rename to dev/Test.py diff --git a/pyproject.toml b/pyproject.toml index 3b9e486..5e6bd9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "inflect>=7.5.0", "phonemizer-fork>=3.3.2", "av>=14.2.0", + "text2num>=2.5.1", ] [project.optional-dependencies] From 14bd6a8118dbace2af282a8b3798c8870fed5fb2 Mon Sep 17 00:00:00 2001 From: Richard Roberson Date: Fri, 21 Mar 2025 22:05:25 -0600 Subject: [PATCH 2/3] Fix Helm charts health check, ingress, and values --- charts/kokoro-fastapi/Chart.yaml | 28 ++---- charts/kokoro-fastapi/aks-tls-values.yaml | 54 ++++++++++ charts/kokoro-fastapi/templates/NOTES.txt | 6 +- charts/kokoro-fastapi/templates/ingress.yaml | 99 ++++++------------- .../templates/kokoro-tts-deployment.yaml | 8 +- .../templates/tests/test-connection.yaml | 2 +- charts/kokoro-fastapi/values.yaml | 67 +++++-------- 7 files changed, 127 insertions(+), 137 deletions(-) create mode 100644 charts/kokoro-fastapi/aks-tls-values.yaml diff --git a/charts/kokoro-fastapi/Chart.yaml b/charts/kokoro-fastapi/Chart.yaml index bd0cf5d..0f79d40 100644 --- a/charts/kokoro-fastapi/Chart.yaml +++ b/charts/kokoro-fastapi/Chart.yaml @@ -1,24 +1,12 @@ apiVersion: v2 name: kokoro-fastapi -description: A Helm chart for kokoro-fastapi - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. +description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes type: application +version: 0.2.0 +appVersion: "0.2.0" -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 - -# This is the version number of the application being deployed. This version number should be -# incremented each time you make changes to the application. Versions are not expected to -# follow Semantic Versioning. They should reflect the version the application is using. -# It is recommended to use it with quotes. -appVersion: "1.16.0" +keywords: + - tts + - fastapi + - gpu + - kokoro diff --git a/charts/kokoro-fastapi/aks-tls-values.yaml b/charts/kokoro-fastapi/aks-tls-values.yaml new file mode 100644 index 0000000..2a6d13d --- /dev/null +++ b/charts/kokoro-fastapi/aks-tls-values.yaml @@ -0,0 +1,54 @@ +# Tested on +# - Azure AKS with GPU node pool with Nvidia GPU operator +# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests +# +# Azure CLI command to create a GPU node pool: +# az aks nodepool add \ +# --resource-group $AZ_RESOURCE_GROUP \ +# --cluster-name $CLUSTER_NAME \ +# --name t4gpus \ +# --node-vm-size Standard_NC4as_T4_v3 \ +# --node-count 2 \ +# --enable-cluster-autoscaler \ +# --min-count 1 \ +# --max-count 2 \ +# --priority Spot \ +# --eviction-policy Delete \ +# --spot-max-price -1 \ +# --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \ +# --skip-gpu-driver-install + +kokoroTTS: + replicaCount: 2 + port: 8880 + tag: v0.2.0 + pullPolicy: IfNotPresent + +# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator +tolerations: + - key: "kubernetes.azure.com/scalesetpriority" + operator: Equal + value: "spot" + effect: NoSchedule + - key: "sku" + operator: Equal + value: "gpu" + effect: NoSchedule + +ingress: + enabled: true + className: "nginx" + annotations: + # Requires cert-manager and external-dns to be in the cluster for TLS and DNS + cert-manager.io/cluster-issuer: letsencrypt-prod + external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname + external-dns.alpha.kubernetes.io/cloudflare-proxied: "false" + hosts: + - host: your-external-dns-enabled-hostname + paths: + - path: / + pathType: Prefix + tls: + - secretName: kokoro-fastapi-tls + hosts: + - your-external-dns-enabled-hostname \ No newline at end of file diff --git a/charts/kokoro-fastapi/templates/NOTES.txt b/charts/kokoro-fastapi/templates/NOTES.txt index 88b8980..bc009b8 100644 --- a/charts/kokoro-fastapi/templates/NOTES.txt +++ b/charts/kokoro-fastapi/templates/NOTES.txt @@ -13,10 +13,10 @@ NOTE: It may take a few minutes for the LoadBalancer IP to be available. You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}' export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") - echo http://$SERVICE_IP:{{ .Values.service.port }} + echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }} {{- else if contains "ClusterIP" .Values.service.type }} export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") - echo "Visit http://127.0.0.1:8080 to use your application" - kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT + echo "Visit http://127.0.0.1:8880 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT {{- end }} diff --git a/charts/kokoro-fastapi/templates/ingress.yaml b/charts/kokoro-fastapi/templates/ingress.yaml index 09a8fb5..a9c9f4e 100644 --- a/charts/kokoro-fastapi/templates/ingress.yaml +++ b/charts/kokoro-fastapi/templates/ingress.yaml @@ -1,82 +1,43 @@ {{- if .Values.ingress.enabled -}} -{{- $fullName := include "kokoro-fastapi.fullname" . -}} -{{- $svcPort := .Values.service.port -}} -{{- $rewriteTargets := (list) -}} -{{- with .Values.ingress.host }} - {{- range .endpoints }} - {{- $serviceName := default $fullName .serviceName -}} - {{- $rewrite := .rewrite | default "none" -}} - {{- if not (has $rewrite $rewriteTargets ) -}} - {{- $rewriteTargets = append $rewriteTargets $rewrite -}} - {{- end -}} - {{- end}} -{{- end }} -{{- range $key := $rewriteTargets }} -{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: -{{- if eq $key "none" }} - name: {{ $fullName }} -{{- else }} - name: {{ $fullName }}-{{ $expandedRewrite }} -{{- end }} + name: {{ include "kokoro-fastapi.fullname" . }} labels: - {{- include "kokoro-fastapi.labels" $ | nindent 4 }} - {{- if ne $key "none" }} + {{- include "kokoro-fastapi.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} annotations: - nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2 - {{- end }} -spec: -{{- if $.Values.ingress.tls }} - tls: - {{- range $.Values.ingress.tls }} - - hosts: - {{- range .hosts }} - - {{ . | quote }} - {{- end }} - secretName: {{ .secretName }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} {{- end }} -{{- end }} rules: - {{- with $.Values.ingress.host }} - - host: {{ .name | quote }} + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} http: paths: - {{- range .endpoints }} - {{- $serviceName := default $fullName .serviceName -}} - {{- $servicePort := default (print "http") .servicePort -}} - {{- if eq ( .rewrite | default "none" ) $key }} - {{- range .paths }} - {{- if not (contains "@" .) }} - {{- if eq $key "none" }} - - path: {{ . }} - {{- else }} - - path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*) - {{- end }} - pathType: Prefix - backend: - service: - name: "{{ $fullName }}-{{ $serviceName }}" - port: - number: {{ $servicePort }} - {{- else }} - {{- $path := . -}} - {{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}} - {{- range $count, $e := until ($replicaCount|int) }} - - path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*) - pathType: Prefix - backend: - service: - name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}" - port: - number: {{ $servicePort }} - {{- end }} - {{- end }} + {{- range .paths }} + - path: {{ .path }} + {{- with .pathType }} + pathType: {{ . }} {{- end }} + backend: + service: + name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service + port: + number: {{ $.Values.kokoroTTS.port }} {{- end }} - {{- end }} - {{- end }} ---- -{{- end }} + {{- end }} {{- end }} diff --git a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml index be1f67b..2178a08 100644 --- a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml +++ b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml @@ -20,7 +20,7 @@ spec: labels: {{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }} spec: - {{- with .Values.images.imagePullSecrets }} + {{- with .Values.kokoroTTS.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} @@ -49,10 +49,16 @@ spec: httpGet: path: /health port: kokoro-tts-http + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 readinessProbe: httpGet: path: /health port: kokoro-tts-http + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 resources: {{- toYaml .Values.kokoroTTS.resources | nindent 12 }} volumeMounts: [] diff --git a/charts/kokoro-fastapi/templates/tests/test-connection.yaml b/charts/kokoro-fastapi/templates/tests/test-connection.yaml index 120583f..8b912c6 100644 --- a/charts/kokoro-fastapi/templates/tests/test-connection.yaml +++ b/charts/kokoro-fastapi/templates/tests/test-connection.yaml @@ -11,5 +11,5 @@ spec: - name: wget image: busybox command: ['wget'] - args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}'] + args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}'] restartPolicy: Never diff --git a/charts/kokoro-fastapi/values.yaml b/charts/kokoro-fastapi/values.yaml index 0db2f95..e2e37e4 100644 --- a/charts/kokoro-fastapi/values.yaml +++ b/charts/kokoro-fastapi/values.yaml @@ -1,12 +1,19 @@ # Default values for kokoro-fastapi. # This is a YAML-formatted file. # Declare variables to be passed into your templates. - -replicaCount: 1 - -images: - pullPolicy: "Always" - imagePullSecrets: [ ] +kokoroTTS: + replicaCount: 1 + # The name of the deployment repository + repository: "ghcr.io/remsky/kokoro-fastapi-gpu" + imagePullSecrets: [] # Set if using a private image or getting rate limited + tag: "latest" + pullPolicy: Always + port: 8880 + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 nameOverride: "" fullnameOverride: "" @@ -38,47 +45,21 @@ service: ingress: enabled: false - className: "" + className: "nginx" annotations: {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - host: - name: kokoro.example.com - endpoints: - - paths: - - "/" - serviceName: "fastapi" - servicePort: 8880 + # cert-manager.io/cluster-issuer: letsencrypt-prod + # external-dns.alpha.kubernetes.io/hostname: kokoro.example.com + # external-dns.alpha.kubernetes.io/cloudflare-proxied: "false" + hosts: + - host: kokoro.example.com + paths: + - path: / + pathType: Prefix tls: [] - # - secretName: chart-example-tls + # - secretName: kokoro-fastapi-tls # hosts: - # - chart-example.local - -kokoroTTS: - repository: "ghcr.io/remsky/kokoro-fastapi-gpu" - tag: "latest" - pullPolicy: Always - serviceName: "fastapi" - port: 8880 - replicaCount: 1 - resources: - limits: - nvidia.com/gpu: 1 - requests: - nvidia.com/gpu: 1 - - - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi + # - kokoro.example.com autoscaling: enabled: false From 3c8e1b9143cf4629f57d40e2760068fd40442b24 Mon Sep 17 00:00:00 2001 From: Richard Roberson Date: Sat, 22 Mar 2025 05:54:58 -0600 Subject: [PATCH 3/3] Add example folder values files for Azure AKS and Nvidia GPU Operator --- .../{ => examples}/aks-tls-values.yaml | 2 +- .../examples/gpu-operator-values.yaml | 56 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) rename charts/kokoro-fastapi/{ => examples}/aks-tls-values.yaml (98%) create mode 100644 charts/kokoro-fastapi/examples/gpu-operator-values.yaml diff --git a/charts/kokoro-fastapi/aks-tls-values.yaml b/charts/kokoro-fastapi/examples/aks-tls-values.yaml similarity index 98% rename from charts/kokoro-fastapi/aks-tls-values.yaml rename to charts/kokoro-fastapi/examples/aks-tls-values.yaml index 2a6d13d..236af0a 100644 --- a/charts/kokoro-fastapi/aks-tls-values.yaml +++ b/charts/kokoro-fastapi/examples/aks-tls-values.yaml @@ -19,7 +19,7 @@ # --skip-gpu-driver-install kokoroTTS: - replicaCount: 2 + replicaCount: 8 port: 8880 tag: v0.2.0 pullPolicy: IfNotPresent diff --git a/charts/kokoro-fastapi/examples/gpu-operator-values.yaml b/charts/kokoro-fastapi/examples/gpu-operator-values.yaml new file mode 100644 index 0000000..b74667f --- /dev/null +++ b/charts/kokoro-fastapi/examples/gpu-operator-values.yaml @@ -0,0 +1,56 @@ +# Follow the official NVIDIA GPU Operator documentation +# to install the GPU operator with these settings: +# https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html +# +# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS. +# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs. +# So each pod has access to a smaller gpu with 4gb of memory. +# +devicePlugin: # Remove this if you dont want to use time-slicing + config: + create: true + name: "time-slicing-config" + default: "any" + data: + any: |- + version: v1 + flags: + migStrategy: none + sharing: + timeSlicing: + resources: + - name: nvidia.com/gpu + replicas: 4 + +daemonsets: + tolerations: + - key: "sku" + operator: Equal + value: "gpu" + effect: NoSchedule + - key: "kubernetes.azure.com/scalesetpriority" + operator: Equal + value: "spot" + effect: NoSchedule + +node-feature-discovery: + master: + tolerations: + - key: "sku" + operator: Equal + value: "gpu" + effect: NoSchedule + - key: "kubernetes.azure.com/scalesetpriority" + operator: Equal + value: "spot" + effect: NoSchedule + worker: + tolerations: + - key: "sku" + operator: Equal + value: "gpu" + effect: NoSchedule + - key: "kubernetes.azure.com/scalesetpriority" + operator: Equal + value: "spot" + effect: NoSchedule \ No newline at end of file