From d712308f9866dd4886aa3f984422f8e6b43982b7 Mon Sep 17 00:00:00 2001
From: Fireblade2534 <Fireblade5234@gmail.com>
Date: Fri, 21 Mar 2025 18:03:09 +0000
Subject: [PATCH 1/3] Fixes relating to parsing money and tests. Also readme
 stuff

---
 README.md                                     | 31 +++++++++++-
 api/src/routers/openai_compatible.py          |  4 +-
 api/src/services/streaming_audio_writer.py    |  2 +-
 .../services/text_processing/normalizer.py    | 48 ++++++++++++-------
 .../text_processing/text_processor.py         |  1 +
 api/src/services/tts_service.py               |  2 +-
 api/tests/test_kokoro_v1.py                   | 19 ++++----
 api/tests/test_normalizer.py                  |  7 ++-
 api/tests/test_text_processor.py              | 15 +++++-
 Test Threads.py => dev/Test Threads.py        |  0
 Test copy.py => dev/Test copy.py              |  0
 dev/Test money.py                             | 26 ++++++++++
 dev/Test num.py                               | 45 +++++++++++++++++
 Test.py => dev/Test.py                        |  0
 pyproject.toml                                |  1 +
 15 files changed, 165 insertions(+), 36 deletions(-)
 rename Test Threads.py => dev/Test Threads.py (100%)
 rename Test copy.py => dev/Test copy.py (100%)
 create mode 100644 dev/Test money.py
 create mode 100644 dev/Test num.py
 rename Test.py => dev/Test.py (100%)
diff --git a/README.md b/README.md
index 6859163..5318b03 100644
--- a/README.md
+++ b/README.md
@@ -516,7 +516,36 @@ Monitor system state and resource usage with these endpoints:
 Useful for debugging resource exhaustion or performance issues.
 </details>
 
-## Known Issues
+## Known Issues & Troubleshooting
+
+<details>
+<summary>Missing words & Missing some timestamps</summary>
+
+The api will automaticly do text normalization on input text which may incorrectly remove or change some phrases. This can be disabled by adding `"normalization_options":{"normalize": false}` to your request json:
+```python
+import requests
+
+response = requests.post(
+    "http://localhost:8880/v1/audio/speech",
+    json={
+        "input": "Hello world!",
+        "voice": "af_heart",
+        "response_format": "pcm",
+        "normalization_options":
+        {
+            "normalize": False
+        }
+    },
+    stream=True
+)
+
+for chunk in response.iter_content(chunk_size=1024):
+    if chunk:
+        # Process streaming chunks
+        pass
+```
+  
+</details>
 
 <details>
 <summary>Versioning & Development</summary>
diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py
index 742c216..1e89151 100644
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@@ -125,20 +125,18 @@ async def process_and_validate_voices(voice_input: Union[str, List[str]], tts_se
 async def stream_audio_chunks(tts_service: TTSService, request: Union[OpenAISpeechRequest, CaptionedSpeechRequest], client_request: Request, writer: StreamingAudioWriter) -> AsyncGenerator[AudioChunk, None]:
     """Stream audio chunks as they're generated with client disconnect handling"""
     voice_name = await process_and_validate_voices(request.voice, tts_service)
-
     unique_properties = {"return_timestamps": False}
     if hasattr(request, "return_timestamps"):
         unique_properties["return_timestamps"] = request.return_timestamps
 
     try:
-        logger.info(f"Starting audio generation with lang_code: {request.lang_code}")
         async for chunk_data in tts_service.generate_audio_stream(
             text=request.input,
             voice=voice_name,
             writer=writer,
             speed=request.speed,
             output_format=request.response_format,
-            lang_code=request.lang_code or settings.default_voice_code or voice_name[0].lower(),
+            lang_code=request.lang_code,
             normalization_options=request.normalization_options,
             return_timestamps=unique_properties["return_timestamps"],
         ):
diff --git a/api/src/services/streaming_audio_writer.py b/api/src/services/streaming_audio_writer.py
index 763c5eb..75d87b4 100644
--- a/api/src/services/streaming_audio_writer.py
+++ b/api/src/services/streaming_audio_writer.py
@@ -25,7 +25,7 @@ class StreamingAudioWriter:
         if self.format in ["wav","flac","mp3","pcm","aac","opus"]:
             if self.format != "pcm":
                 self.output_buffer = BytesIO()
-                self.container = av.open(self.output_buffer, mode="w", format=self.format)
+                self.container = av.open(self.output_buffer, mode="w", format=self.format if self.format != "aac" else "adts")
                 self.stream = self.container.add_stream(codec_map[self.format],sample_rate=self.sample_rate,layout='mono' if self.channels == 1 else 'stereo')
                 self.stream.bit_rate = 128000
         else:
diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py
index 84c3694..0acfe4d 100644
--- a/api/src/services/text_processing/normalizer.py
+++ b/api/src/services/text_processing/normalizer.py
@@ -8,9 +8,11 @@ import re
 from functools import lru_cache
 import inflect
 from numpy import number
-
+from torch import mul
 from ...structures.schemas import NormalizationOptions
 
+from text_to_num import text2num
+
 # Constants
 VALID_TLDS = [
     "com",
@@ -134,25 +136,35 @@ def handle_units(u: re.Match[str]) -> str:
         unit[0]=INFLECT_ENGINE.no(unit[0],number)
     return " ".join(unit)
 
+def conditional_int(number: float, threshold: float = 0.00001):
+    if abs(round(number) - number) < threshold:
+        return int(round(number))
+    return number
+
 def handle_money(m: re.Match[str]) -> str:
     """Convert money expressions to spoken form"""
-    m = m.group()
-    bill = "dollar" if m[0] == "$" else "pound"
-    if m[-1].isalpha():
-        return f"{INFLECT_ENGINE.number_to_words(m[1:])} {bill}s"
-    elif "." not in m:
-        s = "" if m[1:] == "1" else "s"
-        return f"{INFLECT_ENGINE.number_to_words(m[1:])} {bill}{s}"
-    b, c = m[1:].split(".")
-    s = "" if b == "1" else "s"
-    c = int(c.ljust(2, "0"))
-    coins = (
-        f"cent{'' if c == 1 else 's'}"
-        if m[0] == "$"
-        else ("penny" if c == 1 else "pence")
-    )
-    return f"{INFLECT_ENGINE.number_to_words(b)} {bill}{s} and {INFLECT_ENGINE.number_to_words(c)} {coins}"
 
+    bill = "dollar" if m.group(2) == "$" else "pound"
+    coin = "cent" if m.group(2) == "$" else "pence"
+    number = m.group(3)
+
+    multiplier = m.group(4)
+    try:
+        number = float(number)
+    except:
+        return m.group()
+    
+    if m.group(1) == "-":
+        number *= -1
+
+    if number % 1 == 0 or multiplier != "":
+        text_number = f"{INFLECT_ENGINE.number_to_words(conditional_int(number))}{multiplier} {INFLECT_ENGINE.plural(bill, count=number)}"
+    else:
+        sub_number = int(str(number).split(".")[-1].ljust(2, "0"))
+
+        text_number = f"{INFLECT_ENGINE.number_to_words(int(round(number)))} {INFLECT_ENGINE.plural(bill, count=number)} and {INFLECT_ENGINE.number_to_words(sub_number)} {INFLECT_ENGINE.plural(coin, count=sub_number)}"
+
+    return text_number
 
 def handle_decimal(num: re.Match[str]) -> str:
     """Convert decimal numbers to spoken form"""
@@ -297,7 +309,7 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str
     text = re.sub(r"(?<=\d),(?=\d)", "", text)
     
     text = re.sub(
-        r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b",
+        r"(?i)(-?)([$£])(\d+(?:\.\d+)?)((?: hundred| thousand| (?:[bm]|tr|quadr)illion)*)\b",
         handle_money,
         text,
     )
diff --git a/api/src/services/text_processing/text_processor.py b/api/src/services/text_processing/text_processor.py
index 0d8d36c..0bd4658 100644
--- a/api/src/services/text_processing/text_processor.py
+++ b/api/src/services/text_processing/text_processor.py
@@ -134,6 +134,7 @@ async def smart_split(
 
     # Normalize text
     if settings.advanced_text_normalization and normalization_options.normalize:
+        print(lang_code)
         if lang_code in ["a","b","en-us","en-gb"]:
             text = CUSTOM_PHONEMES.sub(lambda s: handle_custom_phonemes(s, custom_phoneme_list), text)
             text=normalize_text(text,normalization_options)
diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py
index f740a29..8a6bb42 100644
--- a/api/src/services/tts_service.py
+++ b/api/src/services/tts_service.py
@@ -258,7 +258,7 @@ class TTSService:
             logger.info(f"Using lang_code '{pipeline_lang_code}' for voice '{voice_name}' in audio stream")
 
             # Process text in chunks with smart splitting
-            async for chunk_text, tokens in smart_split(text, lang_code=lang_code, normalization_options=normalization_options):
+            async for chunk_text, tokens in smart_split(text, lang_code=pipeline_lang_code, normalization_options=normalization_options):
                 try:
                     # Process audio for chunk
                     async for chunk_data in self._process_chunk(
diff --git a/api/tests/test_kokoro_v1.py b/api/tests/test_kokoro_v1.py
index 850ed05..29d83c5 100644
--- a/api/tests/test_kokoro_v1.py
+++ b/api/tests/test_kokoro_v1.py
@@ -23,19 +23,18 @@ def test_initial_state(kokoro_backend):
 
 
 @patch("torch.cuda.is_available", return_value=True)
-@patch("torch.cuda.memory_allocated")
+@patch("torch.cuda.memory_allocated", return_value=5e9)
 def test_memory_management(mock_memory, mock_cuda, kokoro_backend):
     """Test GPU memory management functions."""
-    # Mock GPU memory usage
-    mock_memory.return_value = 5e9  # 5GB
+    # Patch backend so it thinks we have cuda
+    with patch.object(kokoro_backend, "_device", "cuda"):
+        # Test memory check
+        with patch("api.src.inference.kokoro_v1.model_config") as mock_config:
+            mock_config.pytorch_gpu.memory_threshold = 4
+            assert kokoro_backend._check_memory() == True
 
-    # Test memory check
-    with patch("api.src.inference.kokoro_v1.model_config") as mock_config:
-        mock_config.pytorch_gpu.memory_threshold = 4
-        assert kokoro_backend._check_memory() == True
-
-        mock_config.pytorch_gpu.memory_threshold = 6
-        assert kokoro_backend._check_memory() == False
+            mock_config.pytorch_gpu.memory_threshold = 6
+            assert kokoro_backend._check_memory() == False
 
 
 @patch("torch.cuda.empty_cache")
diff --git a/api/tests/test_normalizer.py b/api/tests/test_normalizer.py
index 0aa963e..6dd4342 100644
--- a/api/tests/test_normalizer.py
+++ b/api/tests/test_normalizer.py
@@ -83,7 +83,12 @@ def test_url_email_addresses():
         == "Send to test dot user at site dot com"
     )
 
-
+def test_money():
+    """Test that money text is normalized correctly"""
+    assert normalize_text("He lost $5.3 thousand.",normalization_options=NormalizationOptions()) == "He lost five point three thousand dollars."
+    assert normalize_text("To put it weirdly -$6.9 million",normalization_options=NormalizationOptions()) == "To put it weirdly minus six point nine million dollars"
+    assert normalize_text("It costs $50.3.",normalization_options=NormalizationOptions()) == "It costs fifty dollars and thirty cents."
+ 
 def test_non_url_text():
     """Test that non-URL text is unaffected"""
     assert normalize_text("This is not.a.url text",normalization_options=NormalizationOptions()) == "This is not-a-url text"
diff --git a/api/tests/test_text_processor.py b/api/tests/test_text_processor.py
index 3d844b1..7e5fb0f 100644
--- a/api/tests/test_text_processor.py
+++ b/api/tests/test_text_processor.py
@@ -34,7 +34,7 @@ def test_process_text_chunk_phonemes():
 def test_get_sentence_info():
     """Test sentence splitting and info extraction."""
     text = "This is sentence one. This is sentence two! What about three?"
-    results = get_sentence_info(text)
+    results = get_sentence_info(text, {})
 
     assert len(results) == 3
     for sentence, tokens, count in results:
@@ -44,6 +44,19 @@ def test_get_sentence_info():
         assert count == len(tokens)
         assert count > 0
 
+def test_get_sentence_info_phenomoes():
+    """Test sentence splitting and info extraction."""
+    text = "This is sentence one. This is </|custom_phonemes_0|/> two! What about three?"
+    results = get_sentence_info(text, {"</|custom_phonemes_0|/>": r"sˈɛntᵊns"})
+
+    assert len(results) == 3
+    assert "sˈɛntᵊns" in results[1][0]
+    for sentence, tokens, count in results:
+        assert isinstance(sentence, str)
+        assert isinstance(tokens, list)
+        assert isinstance(count, int)
+        assert count == len(tokens)
+        assert count > 0
 
 @pytest.mark.asyncio
 async def test_smart_split_short_text():
diff --git a/Test Threads.py b/dev/Test Threads.py
similarity index 100%
rename from Test Threads.py
rename to dev/Test Threads.py
diff --git a/Test copy.py b/dev/Test copy.py
similarity index 100%
rename from Test copy.py
rename to dev/Test copy.py
diff --git a/dev/Test money.py b/dev/Test money.py
new file mode 100644
index 0000000..4956070
--- /dev/null
+++ b/dev/Test money.py	
@@ -0,0 +1,26 @@
+import requests
+import base64
+import json
+
+text="""the administration has offered up a platter of repression for more than a year and is still slated to lose $400 million.
+
+Columbia is the largest private landowner in New York City and boasts an endowment of $14.8 billion;"""
+
+
+Type="wav"
+
+response = requests.post(
+    "http://localhost:8880/v1/audio/speech",
+    json={
+        "model": "kokoro",
+        "input": text,
+        "voice": "af_heart+af_sky",
+        "speed": 1.0,
+        "response_format": Type,
+        "stream": False,
+    },
+    stream=True
+)
+
+with open(f"outputnostreammoney.{Type}", "wb") as f:
+    f.write(response.content)
diff --git a/dev/Test num.py b/dev/Test num.py
new file mode 100644
index 0000000..15bd4d7
--- /dev/null
+++ b/dev/Test num.py	
@@ -0,0 +1,45 @@
+from text_to_num import text2num
+import re
+import inflect
+from torch import mul
+
+INFLECT_ENGINE = inflect.engine()
+
+
+def conditional_int(number: float, threshold: float = 0.00001):
+    if abs(round(number) - number) < threshold:
+        return int(round(number))
+    return number
+
+def handle_money(m: re.Match[str]) -> str:
+    """Convert money expressions to spoken form"""
+
+    bill = "dollar" if m.group(2) == "$" else "pound"
+    coin = "cent" if m.group(2) == "$" else "pence"
+    number = m.group(3)
+
+    multiplier = m.group(4)
+    try:
+        number = float(number)
+    except:
+        return m.group()
+    
+    if m.group(1) == "-":
+        number *= -1
+
+    if number % 1 == 0 or multiplier != "":
+        text_number = f"{INFLECT_ENGINE.number_to_words(conditional_int(number))}{multiplier} {INFLECT_ENGINE.plural(bill, count=number)}"
+    else:
+        sub_number = int(str(number).split(".")[-1].ljust(2, "0"))
+
+        text_number = f"{INFLECT_ENGINE.number_to_words(int(round(number)))} {INFLECT_ENGINE.plural(bill, count=number)} and {INFLECT_ENGINE.number_to_words(sub_number)} {INFLECT_ENGINE.plural(coin, count=sub_number)}"
+
+    return text_number
+
+
+text = re.sub(
+    r"(?i)(-?)([$£])(\d+(?:\.\d+)?)((?: hundred| thousand| (?:[bm]|tr|quadr)illion)*)\b",
+    handle_money,
+    "he administration has offered up a platter of repression for more than a year and is still slated to lose -$5.3 billion",
+)
+print(text)
diff --git a/Test.py b/dev/Test.py
similarity index 100%
rename from Test.py
rename to dev/Test.py
diff --git a/pyproject.toml b/pyproject.toml
index 3b9e486..5e6bd9c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
     "inflect>=7.5.0",
     "phonemizer-fork>=3.3.2",
     "av>=14.2.0",
+    "text2num>=2.5.1",
 ]
 
 [project.optional-dependencies]

From 14bd6a8118dbace2af282a8b3798c8870fed5fb2 Mon Sep 17 00:00:00 2001
From: Richard Roberson <richardr1126@gmail.com>
Date: Fri, 21 Mar 2025 22:05:25 -0600
Subject: [PATCH 2/3] Fix Helm charts health check, ingress, and values

---
 charts/kokoro-fastapi/Chart.yaml              | 28 ++----
 charts/kokoro-fastapi/aks-tls-values.yaml     | 54 ++++++++++
 charts/kokoro-fastapi/templates/NOTES.txt     |  6 +-
 charts/kokoro-fastapi/templates/ingress.yaml  | 99 ++++++-------------
 .../templates/kokoro-tts-deployment.yaml      |  8 +-
 .../templates/tests/test-connection.yaml      |  2 +-
 charts/kokoro-fastapi/values.yaml             | 67 +++++--------
 7 files changed, 127 insertions(+), 137 deletions(-)
 create mode 100644 charts/kokoro-fastapi/aks-tls-values.yaml

diff --git a/charts/kokoro-fastapi/Chart.yaml b/charts/kokoro-fastapi/Chart.yaml
index bd0cf5d..0f79d40 100644
--- a/charts/kokoro-fastapi/Chart.yaml
+++ b/charts/kokoro-fastapi/Chart.yaml
@@ -1,24 +1,12 @@
 apiVersion: v2
 name: kokoro-fastapi
-description: A Helm chart for kokoro-fastapi
-
-# A chart can be either an 'application' or a 'library' chart.
-#
-# Application charts are a collection of templates that can be packaged into versioned archives
-# to be deployed.
-#
-# Library charts provide useful utilities or functions for the chart developer. They're included as
-# a dependency of application charts to inject those utilities and functions into the rendering
-# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes
 type: application
+version: 0.2.0
+appVersion: "0.2.0"
 
-# This is the chart version. This version number should be incremented each time you make changes
-# to the chart and its templates, including the app version.
-# Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
-
-# This is the version number of the application being deployed. This version number should be
-# incremented each time you make changes to the application. Versions are not expected to
-# follow Semantic Versioning. They should reflect the version the application is using.
-# It is recommended to use it with quotes.
-appVersion: "1.16.0"
+keywords:
+  - tts
+  - fastapi
+  - gpu
+  - kokoro
diff --git a/charts/kokoro-fastapi/aks-tls-values.yaml b/charts/kokoro-fastapi/aks-tls-values.yaml
new file mode 100644
index 0000000..2a6d13d
--- /dev/null
+++ b/charts/kokoro-fastapi/aks-tls-values.yaml
@@ -0,0 +1,54 @@
+# Tested on
+# - Azure AKS with GPU node pool with Nvidia GPU operator
+# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests
+# 
+# Azure CLI command to create a GPU node pool:
+# az aks nodepool add \
+#   --resource-group $AZ_RESOURCE_GROUP \
+#   --cluster-name $CLUSTER_NAME \
+#   --name t4gpus \
+#   --node-vm-size Standard_NC4as_T4_v3 \
+#   --node-count 2 \
+#   --enable-cluster-autoscaler \
+#   --min-count 1 \
+#   --max-count 2 \
+#   --priority Spot \
+#   --eviction-policy Delete \
+#   --spot-max-price -1 \
+#   --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \
+#   --skip-gpu-driver-install
+
+kokoroTTS:
+  replicaCount: 2
+  port: 8880
+  tag: v0.2.0
+  pullPolicy: IfNotPresent
+
+# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator
+tolerations:
+  - key: "kubernetes.azure.com/scalesetpriority"
+    operator: Equal
+    value: "spot"
+    effect: NoSchedule
+  - key: "sku"
+    operator: Equal
+    value: "gpu"
+    effect: NoSchedule
+
+ingress:
+  enabled: true
+  className: "nginx"
+  annotations:
+    # Requires cert-manager and external-dns to be in the cluster for TLS and DNS
+    cert-manager.io/cluster-issuer: letsencrypt-prod
+    external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname
+    external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
+  hosts:
+    - host: your-external-dns-enabled-hostname
+      paths:
+        - path: /
+          pathType: Prefix
+  tls:
+    - secretName: kokoro-fastapi-tls
+      hosts:
+        - your-external-dns-enabled-hostname
\ No newline at end of file
diff --git a/charts/kokoro-fastapi/templates/NOTES.txt b/charts/kokoro-fastapi/templates/NOTES.txt
index 88b8980..bc009b8 100644
--- a/charts/kokoro-fastapi/templates/NOTES.txt
+++ b/charts/kokoro-fastapi/templates/NOTES.txt
@@ -13,10 +13,10 @@
      NOTE: It may take a few minutes for the LoadBalancer IP to be available.
            You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}'
   export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
-  echo http://$SERVICE_IP:{{ .Values.service.port }}
+  echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }}
 {{- else if contains "ClusterIP" .Values.service.type }}
   export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
   export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
-  echo "Visit http://127.0.0.1:8080 to use your application"
-  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+  echo "Visit http://127.0.0.1:8880 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT
 {{- end }}
diff --git a/charts/kokoro-fastapi/templates/ingress.yaml b/charts/kokoro-fastapi/templates/ingress.yaml
index 09a8fb5..a9c9f4e 100644
--- a/charts/kokoro-fastapi/templates/ingress.yaml
+++ b/charts/kokoro-fastapi/templates/ingress.yaml
@@ -1,82 +1,43 @@
 {{- if .Values.ingress.enabled -}}
-{{- $fullName := include "kokoro-fastapi.fullname" . -}}
-{{- $svcPort := .Values.service.port -}}
-{{- $rewriteTargets := (list) -}}
-{{- with .Values.ingress.host }}
-  {{- range .endpoints }}
-    {{- $serviceName := default $fullName .serviceName -}}
-    {{- $rewrite := .rewrite | default "none" -}}
-    {{- if not (has $rewrite $rewriteTargets  ) -}}
-    {{- $rewriteTargets = append $rewriteTargets $rewrite -}}
-    {{- end -}}
-  {{- end}}
-{{- end }}
-{{- range $key := $rewriteTargets }}
-{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}}
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
-{{- if eq $key "none" }}
-  name: {{ $fullName }}
-{{- else }}
-  name: {{ $fullName }}-{{ $expandedRewrite }}
-{{- end }}
+  name: {{ include "kokoro-fastapi.fullname" . }}
   labels:
-    {{- include "kokoro-fastapi.labels" $ | nindent 4 }}
-    {{- if ne $key "none" }}
+    {{- include "kokoro-fastapi.labels" . | nindent 4 }}
+  {{- with .Values.ingress.annotations }}
   annotations:
-    nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2
-    {{- end }}
-spec:
-{{- if $.Values.ingress.tls }}
-  tls:
-  {{- range $.Values.ingress.tls }}
-    - hosts:
-      {{- range .hosts }}
-        - {{ . | quote }}
-      {{- end }}
-      secretName: {{ .secretName }}
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ingress.className }}
+  ingressClassName: {{ . }}
+  {{- end }}
+  {{- if .Values.ingress.tls }}
+  tls:
+    {{- range .Values.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
   {{- end }}
-{{- end }}
   rules:
-  {{- with $.Values.ingress.host }}
-    - host: {{ .name | quote }}
+    {{- range .Values.ingress.hosts }}
+    - host: {{ .host | quote }}
       http:
         paths:
-        {{- range .endpoints }}
-          {{- $serviceName := default $fullName .serviceName -}}
-          {{- $servicePort := default (print "http") .servicePort -}}
-          {{- if eq ( .rewrite | default "none" ) $key }}
-            {{- range .paths }}
-              {{- if not (contains "@" .) }}
-                {{- if eq $key "none" }}
-            - path: {{ . }}
-                {{- else }}
-            - path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*)
-                {{- end }}
-              pathType: Prefix
-              backend:
-                service:
-                  name: "{{ $fullName }}-{{ $serviceName }}"
-                  port:
-                    number: {{ $servicePort }}
-              {{- else }}
-                {{- $path := . -}}
-                {{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}}
-                {{- range $count, $e := until ($replicaCount|int) }}
-            - path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*)
-              pathType: Prefix
-              backend:
-                service:
-                  name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}"
-                  port:
-                    number: {{ $servicePort }}
-                {{- end }}
-              {{- end }}
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- with .pathType }}
+            pathType: {{ . }}
             {{- end }}
+            backend:
+              service:
+                name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service
+                port:
+                  number: {{ $.Values.kokoroTTS.port }}
           {{- end }}
-        {{- end }}
-  {{- end }}
----
-{{- end }}
+    {{- end }}
 {{- end }}
diff --git a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
index be1f67b..2178a08 100644
--- a/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
+++ b/charts/kokoro-fastapi/templates/kokoro-tts-deployment.yaml
@@ -20,7 +20,7 @@ spec:
       labels:
         {{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }}
     spec:
-      {{- with .Values.images.imagePullSecrets }}
+      {{- with .Values.kokoroTTS.imagePullSecrets }}
       imagePullSecrets:
         {{- toYaml . | nindent 8 }}
       {{- end }}
@@ -49,10 +49,16 @@ spec:
             httpGet:
               path: /health
               port: kokoro-tts-http
+            initialDelaySeconds: 30
+            periodSeconds: 30
+            timeoutSeconds: 5
           readinessProbe:
             httpGet:
               path: /health
               port: kokoro-tts-http
+            initialDelaySeconds: 30
+            periodSeconds: 30
+            timeoutSeconds: 5
           resources:
             {{- toYaml .Values.kokoroTTS.resources | nindent 12 }}
           volumeMounts: []
diff --git a/charts/kokoro-fastapi/templates/tests/test-connection.yaml b/charts/kokoro-fastapi/templates/tests/test-connection.yaml
index 120583f..8b912c6 100644
--- a/charts/kokoro-fastapi/templates/tests/test-connection.yaml
+++ b/charts/kokoro-fastapi/templates/tests/test-connection.yaml
@@ -11,5 +11,5 @@ spec:
     - name: wget
       image: busybox
       command: ['wget']
-      args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}']
+      args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}']
   restartPolicy: Never
diff --git a/charts/kokoro-fastapi/values.yaml b/charts/kokoro-fastapi/values.yaml
index 0db2f95..e2e37e4 100644
--- a/charts/kokoro-fastapi/values.yaml
+++ b/charts/kokoro-fastapi/values.yaml
@@ -1,12 +1,19 @@
 # Default values for kokoro-fastapi.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
-
-replicaCount: 1
-
-images:
-  pullPolicy: "Always"
-  imagePullSecrets: [ ]
+kokoroTTS:
+  replicaCount: 1
+  # The name of the deployment repository
+  repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
+  imagePullSecrets: [] # Set if using a private image or getting rate limited
+  tag: "latest"
+  pullPolicy: Always
+  port: 8880
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+    requests:
+      nvidia.com/gpu: 1
 
 nameOverride: ""
 fullnameOverride: ""
@@ -38,47 +45,21 @@ service:
 
 ingress:
   enabled: false
-  className: ""
+  className: "nginx"
   annotations: {}
-    # kubernetes.io/ingress.class: nginx
-    # kubernetes.io/tls-acme: "true"
-  host:
-    name: kokoro.example.com
-    endpoints:
-      - paths:
-          - "/"
-        serviceName: "fastapi"
-        servicePort: 8880
+    # cert-manager.io/cluster-issuer: letsencrypt-prod
+    # external-dns.alpha.kubernetes.io/hostname: kokoro.example.com
+    # external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
+  hosts:
+    - host: kokoro.example.com
+      paths:
+        - path: /
+          pathType: Prefix
 
   tls: []
-  #  - secretName: chart-example-tls
+  #  - secretName: kokoro-fastapi-tls
   #    hosts:
-  #      - chart-example.local
-
-kokoroTTS:
-  repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
-  tag: "latest"
-  pullPolicy: Always
-  serviceName: "fastapi"
-  port: 8880
-  replicaCount: 1
-  resources:
-    limits:
-      nvidia.com/gpu: 1
-    requests:
-      nvidia.com/gpu: 1
-
-
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  # limits:
-  #   cpu: 100m
-  #   memory: 128Mi
-  # requests:
-  #   cpu: 100m
-  #   memory: 128Mi
+  #      - kokoro.example.com
 
 autoscaling:
   enabled: false

From 3c8e1b9143cf4629f57d40e2760068fd40442b24 Mon Sep 17 00:00:00 2001
From: Richard Roberson <richardr1126@gmail.com>
Date: Sat, 22 Mar 2025 05:54:58 -0600
Subject: [PATCH 3/3] Add example folder values files for Azure AKS and Nvidia
 GPU Operator

---
 .../{ => examples}/aks-tls-values.yaml        |  2 +-
 .../examples/gpu-operator-values.yaml         | 56 +++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 rename charts/kokoro-fastapi/{ => examples}/aks-tls-values.yaml (98%)
 create mode 100644 charts/kokoro-fastapi/examples/gpu-operator-values.yaml

diff --git a/charts/kokoro-fastapi/aks-tls-values.yaml b/charts/kokoro-fastapi/examples/aks-tls-values.yaml
similarity index 98%
rename from charts/kokoro-fastapi/aks-tls-values.yaml
rename to charts/kokoro-fastapi/examples/aks-tls-values.yaml
index 2a6d13d..236af0a 100644
--- a/charts/kokoro-fastapi/aks-tls-values.yaml
+++ b/charts/kokoro-fastapi/examples/aks-tls-values.yaml
@@ -19,7 +19,7 @@
 #   --skip-gpu-driver-install
 
 kokoroTTS:
-  replicaCount: 2
+  replicaCount: 8
   port: 8880
   tag: v0.2.0
   pullPolicy: IfNotPresent
diff --git a/charts/kokoro-fastapi/examples/gpu-operator-values.yaml b/charts/kokoro-fastapi/examples/gpu-operator-values.yaml
new file mode 100644
index 0000000..b74667f
--- /dev/null
+++ b/charts/kokoro-fastapi/examples/gpu-operator-values.yaml
@@ -0,0 +1,56 @@
+# Follow the official NVIDIA GPU Operator documentation
+#   to install the GPU operator with these settings:
+#     https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html
+#
+# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS.
+# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs.
+# So each pod has access to a smaller gpu with 4gb of memory.
+#
+devicePlugin: # Remove this if you dont want to use time-slicing
+  config:
+    create: true
+    name: "time-slicing-config"
+    default: "any"
+    data:
+      any: |-
+        version: v1
+        flags:
+          migStrategy: none
+        sharing:
+          timeSlicing:
+            resources:
+            - name: nvidia.com/gpu
+              replicas: 4
+
+daemonsets:
+  tolerations:
+    - key: "sku"
+      operator: Equal
+      value: "gpu"
+      effect: NoSchedule
+    - key: "kubernetes.azure.com/scalesetpriority"
+      operator: Equal
+      value: "spot"
+      effect: NoSchedule
+
+node-feature-discovery:
+  master:
+    tolerations:
+      - key: "sku"
+        operator: Equal
+        value: "gpu"
+        effect: NoSchedule
+      - key: "kubernetes.azure.com/scalesetpriority"
+        operator: Equal
+        value: "spot"
+        effect: NoSchedule
+  worker:
+    tolerations:
+      - key: "sku"
+        operator: Equal
+        value: "gpu"
+        effect: NoSchedule
+      - key: "kubernetes.azure.com/scalesetpriority"
+        operator: Equal
+        value: "spot"
+        effect: NoSchedule
\ No newline at end of file