diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py index 931da64..708b578 100644 --- a/api/src/services/text_processing/normalizer.py +++ b/api/src/services/text_processing/normalizer.py @@ -7,6 +7,7 @@ Converts them into a format suitable for text-to-speech processing. import re from functools import lru_cache import inflect +from numpy import number from ...structures.schemas import NormalizationOptions @@ -87,6 +88,8 @@ URL_PATTERN = re.compile( UNIT_PATTERN = re.compile(r"((? str: @@ -214,6 +217,32 @@ def handle_url(u: re.Match[str]) -> str: # Clean up extra spaces return re.sub(r"\s+", " ", url).strip() +def handle_phone_number(p: re.Match[str]) -> str: + p=list(p.groups()) + + country_code="" + if p[0] is not None: + p[0]=p[0].replace("+","") + country_code += INFLECT_ENGINE.number_to_words(p[0]) + + area_code=INFLECT_ENGINE.number_to_words(p[2].replace("(","").replace(")",""),group=1,comma="") + + telephone_prefix=INFLECT_ENGINE.number_to_words(p[3],group=1,comma="") + + line_number=INFLECT_ENGINE.number_to_words(p[4],group=1,comma="") + + return ",".join([country_code,area_code,telephone_prefix,line_number]) + +def handle_time(t: re.Match[str]) -> str: + t=t.groups() + + numbers = " ".join([INFLECT_ENGINE.number_to_words(X.strip()) for X in t[0].split(":")]) + + half="" + if t[2] is not None: + half=t[2].strip() + + return numbers + half def normalize_text(text: str,normalization_options: NormalizationOptions) -> str: """Normalize text for TTS processing""" @@ -233,6 +262,10 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str if normalization_options.optional_pluralization_normalization: text = re.sub(r"\(s\)","s",text) + # Replace phone numbers: + if normalization_options.phone_normalization: + text = re.sub(r"(\+?\d{1,2})?([ .-]?)(\(?\d{3}\)?)[\s.-](\d{3})[\s.-](\d{4})",handle_phone_number,text) + # Replace quotes and brackets text = text.replace(chr(8216), "'").replace(chr(8217), "'") text = text.replace("«", chr(8220)).replace("»", chr(8221)) @@ -243,6 +276,9 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str for a, b in zip("、。!,:;?–", ",.!,:;?-"): text = text.replace(a, b + " ") + # Handle simple time in the format of HH:MM:SS + text = TIME_PATTERN.sub(handle_time, text, ) + # Clean up whitespace text = re.sub(r"[^\S \n]", " ", text) text = re.sub(r" +", " ", text) diff --git a/api/src/structures/schemas.py b/api/src/structures/schemas.py index e233d61..22c28bc 100644 --- a/api/src/structures/schemas.py +++ b/api/src/structures/schemas.py @@ -43,6 +43,7 @@ class NormalizationOptions(BaseModel): url_normalization: bool = Field(default=True, description="Changes urls so they can be properly pronouced by kokoro") email_normalization: bool = Field(default=True, description="Changes emails so they can be properly pronouced by kokoro") optional_pluralization_normalization: bool = Field(default=True, description="Replaces (s) with s so some words get pronounced correctly") + phone_normalization: bool = Field(default=True, description="Changes phone numbers so they can be properly pronouced by kokoro") class OpenAISpeechRequest(BaseModel): """Request schema for OpenAI-compatible speech endpoint"""