added optional pluralization normalization

This commit is contained in:
Fireblade 2025-02-11 19:24:29 -05:00
parent 09de389b29
commit 7cb5957848
2 changed files with 6 additions and 1 deletions

View file

@ -229,6 +229,10 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str
if normalization_options.unit_normalization:
text=UNIT_PATTERN.sub(handle_units,text)
# Replace optional pluralization
if normalization_options.optional_pluralization_normalization:
text = re.sub(r"\(s\)","s",text)
# Replace quotes and brackets
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
text = text.replace("«", chr(8220)).replace("»", chr(8221))
@ -276,6 +280,6 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str
text = re.sub(
r"(?:[A-Za-z]\.){2,} [a-z]", lambda m: m.group().replace(".", "-"), text
)
text = re.sub(r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text)
text = re.sub( r"(?i)(?<=[A-Z])\.(?=[A-Z])", "-", text)
return text.strip()

View file

@ -42,6 +42,7 @@ class NormalizationOptions(BaseModel):
unit_normalization: bool = Field(default=False,description="Transforms units like 10KB to 10 kilobytes")
url_normalization: bool = Field(default=True, description="Changes urls so they can be properly pronouced by kokoro")
email_normalization: bool = Field(default=True, description="Changes emails so they can be properly pronouced by kokoro")
optional_pluralization_normalization: bool = Field(default=True, description="Replaces (s) with s so some words get pronounced correctly")
class OpenAISpeechRequest(BaseModel):
"""Request schema for OpenAI-compatible speech endpoint"""