Merge branch 'remsky:master' into master

This commit is contained in:
Kishor Prins 2025-03-27 13:10:12 -07:00 committed by GitHub
commit 2185b1b17d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 348 additions and 173 deletions

View file

@ -516,7 +516,36 @@ Monitor system state and resource usage with these endpoints:
Useful for debugging resource exhaustion or performance issues. Useful for debugging resource exhaustion or performance issues.
</details> </details>
## Known Issues ## Known Issues & Troubleshooting
<details>
<summary>Missing words & Missing some timestamps</summary>
The api will automaticly do text normalization on input text which may incorrectly remove or change some phrases. This can be disabled by adding `"normalization_options":{"normalize": false}` to your request json:
```python
import requests
response = requests.post(
"http://localhost:8880/v1/audio/speech",
json={
"input": "Hello world!",
"voice": "af_heart",
"response_format": "pcm",
"normalization_options":
{
"normalize": False
}
},
stream=True
)
for chunk in response.iter_content(chunk_size=1024):
if chunk:
# Process streaming chunks
pass
```
</details>
<details> <details>
<summary>Versioning & Development</summary> <summary>Versioning & Development</summary>

View file

@ -125,20 +125,18 @@ async def process_and_validate_voices(voice_input: Union[str, List[str]], tts_se
async def stream_audio_chunks(tts_service: TTSService, request: Union[OpenAISpeechRequest, CaptionedSpeechRequest], client_request: Request, writer: StreamingAudioWriter) -> AsyncGenerator[AudioChunk, None]: async def stream_audio_chunks(tts_service: TTSService, request: Union[OpenAISpeechRequest, CaptionedSpeechRequest], client_request: Request, writer: StreamingAudioWriter) -> AsyncGenerator[AudioChunk, None]:
"""Stream audio chunks as they're generated with client disconnect handling""" """Stream audio chunks as they're generated with client disconnect handling"""
voice_name = await process_and_validate_voices(request.voice, tts_service) voice_name = await process_and_validate_voices(request.voice, tts_service)
unique_properties = {"return_timestamps": False} unique_properties = {"return_timestamps": False}
if hasattr(request, "return_timestamps"): if hasattr(request, "return_timestamps"):
unique_properties["return_timestamps"] = request.return_timestamps unique_properties["return_timestamps"] = request.return_timestamps
try: try:
logger.info(f"Starting audio generation with lang_code: {request.lang_code}")
async for chunk_data in tts_service.generate_audio_stream( async for chunk_data in tts_service.generate_audio_stream(
text=request.input, text=request.input,
voice=voice_name, voice=voice_name,
writer=writer, writer=writer,
speed=request.speed, speed=request.speed,
output_format=request.response_format, output_format=request.response_format,
lang_code=request.lang_code or settings.default_voice_code or voice_name[0].lower(), lang_code=request.lang_code,
normalization_options=request.normalization_options, normalization_options=request.normalization_options,
return_timestamps=unique_properties["return_timestamps"], return_timestamps=unique_properties["return_timestamps"],
): ):

View file

@ -25,7 +25,7 @@ class StreamingAudioWriter:
if self.format in ["wav","flac","mp3","pcm","aac","opus"]: if self.format in ["wav","flac","mp3","pcm","aac","opus"]:
if self.format != "pcm": if self.format != "pcm":
self.output_buffer = BytesIO() self.output_buffer = BytesIO()
self.container = av.open(self.output_buffer, mode="w", format=self.format) self.container = av.open(self.output_buffer, mode="w", format=self.format if self.format != "aac" else "adts")
self.stream = self.container.add_stream(codec_map[self.format],sample_rate=self.sample_rate,layout='mono' if self.channels == 1 else 'stereo') self.stream = self.container.add_stream(codec_map[self.format],sample_rate=self.sample_rate,layout='mono' if self.channels == 1 else 'stereo')
self.stream.bit_rate = 128000 self.stream.bit_rate = 128000
else: else:

View file

@ -8,9 +8,11 @@ import re
from functools import lru_cache from functools import lru_cache
import inflect import inflect
from numpy import number from numpy import number
from torch import mul
from ...structures.schemas import NormalizationOptions from ...structures.schemas import NormalizationOptions
from text_to_num import text2num
# Constants # Constants
VALID_TLDS = [ VALID_TLDS = [
"com", "com",
@ -134,25 +136,35 @@ def handle_units(u: re.Match[str]) -> str:
unit[0]=INFLECT_ENGINE.no(unit[0],number) unit[0]=INFLECT_ENGINE.no(unit[0],number)
return " ".join(unit) return " ".join(unit)
def conditional_int(number: float, threshold: float = 0.00001):
if abs(round(number) - number) < threshold:
return int(round(number))
return number
def handle_money(m: re.Match[str]) -> str: def handle_money(m: re.Match[str]) -> str:
"""Convert money expressions to spoken form""" """Convert money expressions to spoken form"""
m = m.group()
bill = "dollar" if m[0] == "$" else "pound"
if m[-1].isalpha():
return f"{INFLECT_ENGINE.number_to_words(m[1:])} {bill}s"
elif "." not in m:
s = "" if m[1:] == "1" else "s"
return f"{INFLECT_ENGINE.number_to_words(m[1:])} {bill}{s}"
b, c = m[1:].split(".")
s = "" if b == "1" else "s"
c = int(c.ljust(2, "0"))
coins = (
f"cent{'' if c == 1 else 's'}"
if m[0] == "$"
else ("penny" if c == 1 else "pence")
)
return f"{INFLECT_ENGINE.number_to_words(b)} {bill}{s} and {INFLECT_ENGINE.number_to_words(c)} {coins}"
bill = "dollar" if m.group(2) == "$" else "pound"
coin = "cent" if m.group(2) == "$" else "pence"
number = m.group(3)
multiplier = m.group(4)
try:
number = float(number)
except:
return m.group()
if m.group(1) == "-":
number *= -1
if number % 1 == 0 or multiplier != "":
text_number = f"{INFLECT_ENGINE.number_to_words(conditional_int(number))}{multiplier} {INFLECT_ENGINE.plural(bill, count=number)}"
else:
sub_number = int(str(number).split(".")[-1].ljust(2, "0"))
text_number = f"{INFLECT_ENGINE.number_to_words(int(round(number)))} {INFLECT_ENGINE.plural(bill, count=number)} and {INFLECT_ENGINE.number_to_words(sub_number)} {INFLECT_ENGINE.plural(coin, count=sub_number)}"
return text_number
def handle_decimal(num: re.Match[str]) -> str: def handle_decimal(num: re.Match[str]) -> str:
"""Convert decimal numbers to spoken form""" """Convert decimal numbers to spoken form"""
@ -297,7 +309,7 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str
text = re.sub(r"(?<=\d),(?=\d)", "", text) text = re.sub(r"(?<=\d),(?=\d)", "", text)
text = re.sub( text = re.sub(
r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b", r"(?i)(-?)([$£])(\d+(?:\.\d+)?)((?: hundred| thousand| (?:[bm]|tr|quadr)illion)*)\b",
handle_money, handle_money,
text, text,
) )

View file

@ -134,6 +134,7 @@ async def smart_split(
# Normalize text # Normalize text
if settings.advanced_text_normalization and normalization_options.normalize: if settings.advanced_text_normalization and normalization_options.normalize:
print(lang_code)
if lang_code in ["a","b","en-us","en-gb"]: if lang_code in ["a","b","en-us","en-gb"]:
text = CUSTOM_PHONEMES.sub(lambda s: handle_custom_phonemes(s, custom_phoneme_list), text) text = CUSTOM_PHONEMES.sub(lambda s: handle_custom_phonemes(s, custom_phoneme_list), text)
text=normalize_text(text,normalization_options) text=normalize_text(text,normalization_options)

View file

@ -258,7 +258,7 @@ class TTSService:
logger.info(f"Using lang_code '{pipeline_lang_code}' for voice '{voice_name}' in audio stream") logger.info(f"Using lang_code '{pipeline_lang_code}' for voice '{voice_name}' in audio stream")
# Process text in chunks with smart splitting # Process text in chunks with smart splitting
async for chunk_text, tokens in smart_split(text, lang_code=lang_code, normalization_options=normalization_options): async for chunk_text, tokens in smart_split(text, lang_code=pipeline_lang_code, normalization_options=normalization_options):
try: try:
# Process audio for chunk # Process audio for chunk
async for chunk_data in self._process_chunk( async for chunk_data in self._process_chunk(

View file

@ -23,19 +23,18 @@ def test_initial_state(kokoro_backend):
@patch("torch.cuda.is_available", return_value=True) @patch("torch.cuda.is_available", return_value=True)
@patch("torch.cuda.memory_allocated") @patch("torch.cuda.memory_allocated", return_value=5e9)
def test_memory_management(mock_memory, mock_cuda, kokoro_backend): def test_memory_management(mock_memory, mock_cuda, kokoro_backend):
"""Test GPU memory management functions.""" """Test GPU memory management functions."""
# Mock GPU memory usage # Patch backend so it thinks we have cuda
mock_memory.return_value = 5e9 # 5GB with patch.object(kokoro_backend, "_device", "cuda"):
# Test memory check
with patch("api.src.inference.kokoro_v1.model_config") as mock_config:
mock_config.pytorch_gpu.memory_threshold = 4
assert kokoro_backend._check_memory() == True
# Test memory check mock_config.pytorch_gpu.memory_threshold = 6
with patch("api.src.inference.kokoro_v1.model_config") as mock_config: assert kokoro_backend._check_memory() == False
mock_config.pytorch_gpu.memory_threshold = 4
assert kokoro_backend._check_memory() == True
mock_config.pytorch_gpu.memory_threshold = 6
assert kokoro_backend._check_memory() == False
@patch("torch.cuda.empty_cache") @patch("torch.cuda.empty_cache")

View file

@ -83,6 +83,11 @@ def test_url_email_addresses():
== "Send to test dot user at site dot com" == "Send to test dot user at site dot com"
) )
def test_money():
"""Test that money text is normalized correctly"""
assert normalize_text("He lost $5.3 thousand.",normalization_options=NormalizationOptions()) == "He lost five point three thousand dollars."
assert normalize_text("To put it weirdly -$6.9 million",normalization_options=NormalizationOptions()) == "To put it weirdly minus six point nine million dollars"
assert normalize_text("It costs $50.3.",normalization_options=NormalizationOptions()) == "It costs fifty dollars and thirty cents."
def test_non_url_text(): def test_non_url_text():
"""Test that non-URL text is unaffected""" """Test that non-URL text is unaffected"""

View file

@ -34,7 +34,7 @@ def test_process_text_chunk_phonemes():
def test_get_sentence_info(): def test_get_sentence_info():
"""Test sentence splitting and info extraction.""" """Test sentence splitting and info extraction."""
text = "This is sentence one. This is sentence two! What about three?" text = "This is sentence one. This is sentence two! What about three?"
results = get_sentence_info(text) results = get_sentence_info(text, {})
assert len(results) == 3 assert len(results) == 3
for sentence, tokens, count in results: for sentence, tokens, count in results:
@ -44,6 +44,19 @@ def test_get_sentence_info():
assert count == len(tokens) assert count == len(tokens)
assert count > 0 assert count > 0
def test_get_sentence_info_phenomoes():
"""Test sentence splitting and info extraction."""
text = "This is sentence one. This is </|custom_phonemes_0|/> two! What about three?"
results = get_sentence_info(text, {"</|custom_phonemes_0|/>": r"sˈɛntᵊns"})
assert len(results) == 3
assert "sˈɛntᵊns" in results[1][0]
for sentence, tokens, count in results:
assert isinstance(sentence, str)
assert isinstance(tokens, list)
assert isinstance(count, int)
assert count == len(tokens)
assert count > 0
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_smart_split_short_text(): async def test_smart_split_short_text():

View file

@ -1,24 +1,12 @@
apiVersion: v2 apiVersion: v2
name: kokoro-fastapi name: kokoro-fastapi
description: A Helm chart for kokoro-fastapi description: A Helm chart for deploying the Kokoro FastAPI TTS service to Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application type: application
version: 0.2.0
appVersion: "0.2.0"
# This is the chart version. This version number should be incremented each time you make changes keywords:
# to the chart and its templates, including the app version. - tts
# Versions are expected to follow Semantic Versioning (https://semver.org/) - fastapi
version: 0.1.0 - gpu
- kokoro
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View file

@ -0,0 +1,54 @@
# Tested on
# - Azure AKS with GPU node pool with Nvidia GPU operator
# - This setup uses 1 ingress and load balances between 2 replicas, enabling simultaneous requests
#
# Azure CLI command to create a GPU node pool:
# az aks nodepool add \
# --resource-group $AZ_RESOURCE_GROUP \
# --cluster-name $CLUSTER_NAME \
# --name t4gpus \
# --node-vm-size Standard_NC4as_T4_v3 \
# --node-count 2 \
# --enable-cluster-autoscaler \
# --min-count 1 \
# --max-count 2 \
# --priority Spot \
# --eviction-policy Delete \
# --spot-max-price -1 \
# --node-taints "sku=gpu:NoSchedule,kubernetes.azure.com/scalesetpriority=spot:NoSchedule" \
# --skip-gpu-driver-install
kokoroTTS:
replicaCount: 8
port: 8880
tag: v0.2.0
pullPolicy: IfNotPresent
# Azure specific settings for spot t4 GPU nodes with Nvidia GPU operator
tolerations:
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
ingress:
enabled: true
className: "nginx"
annotations:
# Requires cert-manager and external-dns to be in the cluster for TLS and DNS
cert-manager.io/cluster-issuer: letsencrypt-prod
external-dns.alpha.kubernetes.io/hostname: your-external-dns-enabled-hostname
external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
hosts:
- host: your-external-dns-enabled-hostname
paths:
- path: /
pathType: Prefix
tls:
- secretName: kokoro-fastapi-tls
hosts:
- your-external-dns-enabled-hostname

View file

@ -0,0 +1,56 @@
# Follow the official NVIDIA GPU Operator documentation
# to install the GPU operator with these settings:
# https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html
#
# This example is for a Nvidia T4 16gb GPU node pool with only 1 GPU on each node on Azure AKS.
# It uses time-slicing to share the a and claim to the system that 1 GPU is 4 GPUs.
# So each pod has access to a smaller gpu with 4gb of memory.
#
devicePlugin: # Remove this if you dont want to use time-slicing
config:
create: true
name: "time-slicing-config"
default: "any"
data:
any: |-
version: v1
flags:
migStrategy: none
sharing:
timeSlicing:
resources:
- name: nvidia.com/gpu
replicas: 4
daemonsets:
tolerations:
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule
node-feature-discovery:
master:
tolerations:
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule
worker:
tolerations:
- key: "sku"
operator: Equal
value: "gpu"
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: Equal
value: "spot"
effect: NoSchedule

View file

@ -13,10 +13,10 @@
NOTE: It may take a few minutes for the LoadBalancer IP to be available. NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}' You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "kokoro-fastapi.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "kokoro-fastapi.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }} echo http://$SERVICE_IP:{{ .Values.kokoroTTS.port }}
{{- else if contains "ClusterIP" .Values.service.type }} {{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kokoro-fastapi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application" echo "Visit http://127.0.0.1:8880 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8880:$CONTAINER_PORT
{{- end }} {{- end }}

View file

@ -1,82 +1,43 @@
{{- if .Values.ingress.enabled -}} {{- if .Values.ingress.enabled -}}
{{- $fullName := include "kokoro-fastapi.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- $rewriteTargets := (list) -}}
{{- with .Values.ingress.host }}
{{- range .endpoints }}
{{- $serviceName := default $fullName .serviceName -}}
{{- $rewrite := .rewrite | default "none" -}}
{{- if not (has $rewrite $rewriteTargets ) -}}
{{- $rewriteTargets = append $rewriteTargets $rewrite -}}
{{- end -}}
{{- end}}
{{- end }}
{{- range $key := $rewriteTargets }}
{{- $expandedRewrite := regexReplaceAll "/(.*)$" $key "slash${1}" -}}
apiVersion: networking.k8s.io/v1 apiVersion: networking.k8s.io/v1
kind: Ingress kind: Ingress
metadata: metadata:
{{- if eq $key "none" }} name: {{ include "kokoro-fastapi.fullname" . }}
name: {{ $fullName }}
{{- else }}
name: {{ $fullName }}-{{ $expandedRewrite }}
{{- end }}
labels: labels:
{{- include "kokoro-fastapi.labels" $ | nindent 4 }} {{- include "kokoro-fastapi.labels" . | nindent 4 }}
{{- if ne $key "none" }} {{- with .Values.ingress.annotations }}
annotations: annotations:
nginx.ingress.kubernetes.io/rewrite-target: {{ regexReplaceAll "/$" $key "" }}/$2 {{- toYaml . | nindent 4 }}
{{- end }} {{- end }}
spec: spec:
{{- if $.Values.ingress.tls }} {{- with .Values.ingress.className }}
tls: ingressClassName: {{ . }}
{{- range $.Values.ingress.tls }} {{- end }}
- hosts: {{- if .Values.ingress.tls }}
{{- range .hosts }} tls:
- {{ . | quote }} {{- range .Values.ingress.tls }}
{{- end }} - hosts:
secretName: {{ .secretName }} {{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }} {{- end }}
{{- end }}
rules: rules:
{{- with $.Values.ingress.host }} {{- range .Values.ingress.hosts }}
- host: {{ .name | quote }} - host: {{ .host | quote }}
http: http:
paths: paths:
{{- range .endpoints }} {{- range .paths }}
{{- $serviceName := default $fullName .serviceName -}} - path: {{ .path }}
{{- $servicePort := default (print "http") .servicePort -}} {{- with .pathType }}
{{- if eq ( .rewrite | default "none" ) $key }} pathType: {{ . }}
{{- range .paths }}
{{- if not (contains "@" .) }}
{{- if eq $key "none" }}
- path: {{ . }}
{{- else }}
- path: {{ regexReplaceAll "(.*)/$" . "${1}" }}(/|$)(.*)
{{- end }}
pathType: Prefix
backend:
service:
name: "{{ $fullName }}-{{ $serviceName }}"
port:
number: {{ $servicePort }}
{{- else }}
{{- $path := . -}}
{{- $replicaCount := include "getServiceNameReplicaCount" (dict "global" $.Values "serviceName" $serviceName ) -}}
{{- range $count, $e := until ($replicaCount|int) }}
- path: {{ $path | replace "@" ( . | toString ) }}(/|$)(.*)
pathType: Prefix
backend:
service:
name: "{{ $fullName }}-{{ $serviceName }}-{{ . }}"
port:
number: {{ $servicePort }}
{{- end }}
{{- end }}
{{- end }} {{- end }}
backend:
service:
name: {{ include "kokoro-fastapi.fullname" $ }}-kokoro-tts-service
port:
number: {{ $.Values.kokoroTTS.port }}
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- end }}
---
{{- end }}
{{- end }} {{- end }}

View file

@ -20,7 +20,7 @@ spec:
labels: labels:
{{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }} {{- include "kokoro-fastapi.selectorLabels" . | nindent 8 }}
spec: spec:
{{- with .Values.images.imagePullSecrets }} {{- with .Values.kokoroTTS.imagePullSecrets }}
imagePullSecrets: imagePullSecrets:
{{- toYaml . | nindent 8 }} {{- toYaml . | nindent 8 }}
{{- end }} {{- end }}
@ -49,10 +49,16 @@ spec:
httpGet: httpGet:
path: /health path: /health
port: kokoro-tts-http port: kokoro-tts-http
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 5
readinessProbe: readinessProbe:
httpGet: httpGet:
path: /health path: /health
port: kokoro-tts-http port: kokoro-tts-http
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 5
resources: resources:
{{- toYaml .Values.kokoroTTS.resources | nindent 12 }} {{- toYaml .Values.kokoroTTS.resources | nindent 12 }}
volumeMounts: [] volumeMounts: []

View file

@ -11,5 +11,5 @@ spec:
- name: wget - name: wget
image: busybox image: busybox
command: ['wget'] command: ['wget']
args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.service.port }}'] args: ['{{ include "kokoro-fastapi.fullname" . }}:{{ .Values.kokoroTTS.port }}']
restartPolicy: Never restartPolicy: Never

View file

@ -1,12 +1,19 @@
# Default values for kokoro-fastapi. # Default values for kokoro-fastapi.
# This is a YAML-formatted file. # This is a YAML-formatted file.
# Declare variables to be passed into your templates. # Declare variables to be passed into your templates.
kokoroTTS:
replicaCount: 1 replicaCount: 1
# The name of the deployment repository
images: repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
pullPolicy: "Always" imagePullSecrets: [] # Set if using a private image or getting rate limited
imagePullSecrets: [ ] tag: "latest"
pullPolicy: Always
port: 8880
resources:
limits:
nvidia.com/gpu: 1
requests:
nvidia.com/gpu: 1
nameOverride: "" nameOverride: ""
fullnameOverride: "" fullnameOverride: ""
@ -38,47 +45,21 @@ service:
ingress: ingress:
enabled: false enabled: false
className: "" className: "nginx"
annotations: {} annotations: {}
# kubernetes.io/ingress.class: nginx # cert-manager.io/cluster-issuer: letsencrypt-prod
# kubernetes.io/tls-acme: "true" # external-dns.alpha.kubernetes.io/hostname: kokoro.example.com
host: # external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
name: kokoro.example.com hosts:
endpoints: - host: kokoro.example.com
- paths: paths:
- "/" - path: /
serviceName: "fastapi" pathType: Prefix
servicePort: 8880
tls: [] tls: []
# - secretName: chart-example-tls # - secretName: kokoro-fastapi-tls
# hosts: # hosts:
# - chart-example.local # - kokoro.example.com
kokoroTTS:
repository: "ghcr.io/remsky/kokoro-fastapi-gpu"
tag: "latest"
pullPolicy: Always
serviceName: "fastapi"
port: 8880
replicaCount: 1
resources:
limits:
nvidia.com/gpu: 1
requests:
nvidia.com/gpu: 1
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
autoscaling: autoscaling:
enabled: false enabled: false

26
dev/Test money.py Normal file
View file

@ -0,0 +1,26 @@
import requests
import base64
import json
text="""the administration has offered up a platter of repression for more than a year and is still slated to lose $400 million.
Columbia is the largest private landowner in New York City and boasts an endowment of $14.8 billion;"""
Type="wav"
response = requests.post(
"http://localhost:8880/v1/audio/speech",
json={
"model": "kokoro",
"input": text,
"voice": "af_heart+af_sky",
"speed": 1.0,
"response_format": Type,
"stream": False,
},
stream=True
)
with open(f"outputnostreammoney.{Type}", "wb") as f:
f.write(response.content)

45
dev/Test num.py Normal file
View file

@ -0,0 +1,45 @@
from text_to_num import text2num
import re
import inflect
from torch import mul
INFLECT_ENGINE = inflect.engine()
def conditional_int(number: float, threshold: float = 0.00001):
if abs(round(number) - number) < threshold:
return int(round(number))
return number
def handle_money(m: re.Match[str]) -> str:
"""Convert money expressions to spoken form"""
bill = "dollar" if m.group(2) == "$" else "pound"
coin = "cent" if m.group(2) == "$" else "pence"
number = m.group(3)
multiplier = m.group(4)
try:
number = float(number)
except:
return m.group()
if m.group(1) == "-":
number *= -1
if number % 1 == 0 or multiplier != "":
text_number = f"{INFLECT_ENGINE.number_to_words(conditional_int(number))}{multiplier} {INFLECT_ENGINE.plural(bill, count=number)}"
else:
sub_number = int(str(number).split(".")[-1].ljust(2, "0"))
text_number = f"{INFLECT_ENGINE.number_to_words(int(round(number)))} {INFLECT_ENGINE.plural(bill, count=number)} and {INFLECT_ENGINE.number_to_words(sub_number)} {INFLECT_ENGINE.plural(coin, count=sub_number)}"
return text_number
text = re.sub(
r"(?i)(-?)([$£])(\d+(?:\.\d+)?)((?: hundred| thousand| (?:[bm]|tr|quadr)illion)*)\b",
handle_money,
"he administration has offered up a platter of repression for more than a year and is still slated to lose -$5.3 billion",
)
print(text)

View file

@ -38,6 +38,7 @@ dependencies = [
"inflect>=7.5.0", "inflect>=7.5.0",
"phonemizer-fork>=3.3.2", "phonemizer-fork>=3.3.2",
"av>=14.2.0", "av>=14.2.0",
"text2num>=2.5.1",
] ]
[project.optional-dependencies] [project.optional-dependencies]