mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
changes to how money and numbers are handled
This commit is contained in:
parent
b00c9ec28d
commit
3290bada2e
2 changed files with 9 additions and 8 deletions
|
@ -136,10 +136,10 @@ def handle_money(m: re.Match[str]) -> str:
|
||||||
m = m.group()
|
m = m.group()
|
||||||
bill = "dollar" if m[0] == "$" else "pound"
|
bill = "dollar" if m[0] == "$" else "pound"
|
||||||
if m[-1].isalpha():
|
if m[-1].isalpha():
|
||||||
return f"{m[1:]} {bill}s"
|
return f"{INFLECT_ENGINE.number_to_words(m[1:])} {bill}s"
|
||||||
elif "." not in m:
|
elif "." not in m:
|
||||||
s = "" if m[1:] == "1" else "s"
|
s = "" if m[1:] == "1" else "s"
|
||||||
return f"{m[1:]} {bill}{s}"
|
return f"{INFLECT_ENGINE.number_to_words(m[1:])} {bill}{s}"
|
||||||
b, c = m[1:].split(".")
|
b, c = m[1:].split(".")
|
||||||
s = "" if b == "1" else "s"
|
s = "" if b == "1" else "s"
|
||||||
c = int(c.ljust(2, "0"))
|
c = int(c.ljust(2, "0"))
|
||||||
|
@ -148,7 +148,7 @@ def handle_money(m: re.Match[str]) -> str:
|
||||||
if m[0] == "$"
|
if m[0] == "$"
|
||||||
else ("penny" if c == 1 else "pence")
|
else ("penny" if c == 1 else "pence")
|
||||||
)
|
)
|
||||||
return f"{b} {bill}{s} and {c} {coins}"
|
return f"{INFLECT_ENGINE.number_to_words(b)} {bill}{s} and {INFLECT_ENGINE.number_to_words(c)} {coins}"
|
||||||
|
|
||||||
|
|
||||||
def handle_decimal(num: re.Match[str]) -> str:
|
def handle_decimal(num: re.Match[str]) -> str:
|
||||||
|
@ -259,17 +259,18 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str
|
||||||
text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text)
|
text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text)
|
||||||
|
|
||||||
# Handle numbers and money
|
# Handle numbers and money
|
||||||
text = re.sub(
|
|
||||||
r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)", split_num, text
|
|
||||||
)
|
|
||||||
|
|
||||||
text = re.sub(r"(?<=\d),(?=\d)", "", text)
|
text = re.sub(r"(?<=\d),(?=\d)", "", text)
|
||||||
|
|
||||||
text = re.sub(
|
text = re.sub(
|
||||||
r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b",
|
r"(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b",
|
||||||
handle_money,
|
handle_money,
|
||||||
text,
|
text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
text = re.sub(
|
||||||
|
r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)", split_num, text
|
||||||
|
)
|
||||||
|
|
||||||
text = re.sub(r"\d*\.\d+", handle_decimal, text)
|
text = re.sub(r"\d*\.\d+", handle_decimal, text)
|
||||||
|
|
||||||
# Handle various formatting
|
# Handle various formatting
|
||||||
|
|
|
@ -88,4 +88,4 @@ def test_non_url_text():
|
||||||
"""Test that non-URL text is unaffected"""
|
"""Test that non-URL text is unaffected"""
|
||||||
assert normalize_text("This is not.a.url text",normalization_options=NormalizationOptions()) == "This is not-a-url text"
|
assert normalize_text("This is not.a.url text",normalization_options=NormalizationOptions()) == "This is not-a-url text"
|
||||||
assert normalize_text("Hello, how are you today?",normalization_options=NormalizationOptions()) == "Hello, how are you today?"
|
assert normalize_text("Hello, how are you today?",normalization_options=NormalizationOptions()) == "Hello, how are you today?"
|
||||||
assert normalize_text("It costs $50.",normalization_options=NormalizationOptions()) == "It costs 50 dollars."
|
assert normalize_text("It costs $50.",normalization_options=NormalizationOptions()) == "It costs fifty dollars."
|
||||||
|
|
Loading…
Add table
Reference in a new issue