diff --git a/.gitignore b/.gitignore index af08bc6..351aee1 100644 --- a/.gitignore +++ b/.gitignore @@ -54,6 +54,7 @@ examples/phoneme_examples/output/example_3.wav Kokoro-82M/ ui/data/ EXTERNAL_UV_DOCUMENTATION* +app # Docker Dockerfile* diff --git a/api/src/builds/kokoro.py b/api/src/builds/kokoro.py deleted file mode 100644 index 4b6ff0c..0000000 --- a/api/src/builds/kokoro.py +++ /dev/null @@ -1,151 +0,0 @@ -import re - -import phonemizer -import torch - - -def split_num(num): - num = num.group() - if '.' in num: - return num - elif ':' in num: - h, m = [int(n) for n in num.split(':')] - if m == 0: - return f"{h} o'clock" - elif m < 10: - return f'{h} oh {m}' - return f'{h} {m}' - year = int(num[:4]) - if year < 1100 or year % 1000 < 10: - return num - left, right = num[:2], int(num[2:4]) - s = 's' if num.endswith('s') else '' - if 100 <= year % 1000 <= 999: - if right == 0: - return f'{left} hundred{s}' - elif right < 10: - return f'{left} oh {right}{s}' - return f'{left} {right}{s}' - -def flip_money(m): - m = m.group() - bill = 'dollar' if m[0] == '$' else 'pound' - if m[-1].isalpha(): - return f'{m[1:]} {bill}s' - elif '.' not in m: - s = '' if m[1:] == '1' else 's' - return f'{m[1:]} {bill}{s}' - b, c = m[1:].split('.') - s = '' if b == '1' else 's' - c = int(c.ljust(2, '0')) - coins = f"cent{'' if c == 1 else 's'}" if m[0] == '$' else ('penny' if c == 1 else 'pence') - return f'{b} {bill}{s} and {c} {coins}' - -def point_num(num): - a, b = num.group().split('.') - return ' point '.join([a, ' '.join(b)]) - -def normalize_text(text): - text = text.replace(chr(8216), "'").replace(chr(8217), "'") - text = text.replace('«', chr(8220)).replace('»', chr(8221)) - text = text.replace(chr(8220), '"').replace(chr(8221), '"') - text = text.replace('(', '«').replace(')', '»') - for a, b in zip('、。!,:;?', ',.!,:;?'): - text = text.replace(a, b+' ') - text = re.sub(r'[^\S \n]', ' ', text) - text = re.sub(r' +', ' ', text) - text = re.sub(r'(?<=\n) +(?=\n)', '', text) - text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text) - text = re.sub(r'\b(?:Mr\.|MR\.(?= [A-Z]))', 'Mister', text) - text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text) - text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text) - text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text) - text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text) - text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(? 510: - tokens = tokens[:510] - print('Truncated to 510 tokens') - ref_s = voicepack[len(tokens)] - out = forward(model, tokens, ref_s, speed) - ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens) - return out, ps diff --git a/api/src/core/kokoro.py b/api/src/core/kokoro.py deleted file mode 100644 index 8b65b9b..0000000 --- a/api/src/core/kokoro.py +++ /dev/null @@ -1,185 +0,0 @@ -import re - -import phonemizer -import torch - - -def split_num(num): - num = num.group() - if "." in num: - return num - elif ":" in num: - h, m = [int(n) for n in num.split(":")] - if m == 0: - return f"{h} o'clock" - elif m < 10: - return f"{h} oh {m}" - return f"{h} {m}" - year = int(num[:4]) - if year < 1100 or year % 1000 < 10: - return num - left, right = num[:2], int(num[2:4]) - s = "s" if num.endswith("s") else "" - if 100 <= year % 1000 <= 999: - if right == 0: - return f"{left} hundred{s}" - elif right < 10: - return f"{left} oh {right}{s}" - return f"{left} {right}{s}" - - -def flip_money(m): - m = m.group() - bill = "dollar" if m[0] == "$" else "pound" - if m[-1].isalpha(): - return f"{m[1:]} {bill}s" - elif "." not in m: - s = "" if m[1:] == "1" else "s" - return f"{m[1:]} {bill}{s}" - b, c = m[1:].split(".") - s = "" if b == "1" else "s" - c = int(c.ljust(2, "0")) - coins = ( - f"cent{'' if c == 1 else 's'}" - if m[0] == "$" - else ("penny" if c == 1 else "pence") - ) - return f"{b} {bill}{s} and {c} {coins}" - - -def point_num(num): - a, b = num.group().split(".") - return " point ".join([a, " ".join(b)]) - - -def normalize_text(text): - text = text.replace(chr(8216), "'").replace(chr(8217), "'") - text = text.replace("«", chr(8220)).replace("»", chr(8221)) - text = text.replace(chr(8220), '"').replace(chr(8221), '"') - text = text.replace("(", "«").replace(")", "»") - for a, b in zip("、。!,:;?", ",.!,:;?"): - text = text.replace(a, b + " ") - text = re.sub(r"[^\S \n]", " ", text) - text = re.sub(r" +", " ", text) - text = re.sub(r"(?<=\n) +(?=\n)", "", text) - text = re.sub(r"\bD[Rr]\.(?= [A-Z])", "Doctor", text) - text = re.sub(r"\b(?:Mr\.|MR\.(?= [A-Z]))", "Mister", text) - text = re.sub(r"\b(?:Ms\.|MS\.(?= [A-Z]))", "Miss", text) - text = re.sub(r"\b(?:Mrs\.|MRS\.(?= [A-Z]))", "Mrs", text) - text = re.sub(r"\betc\.(?! [A-Z])", "etc", text) - text = re.sub(r"(?i)\b(y)eah?\b", r"\1e'a", text) - text = re.sub( - r"\d*\.\d+|\b\d{4}s?\b|(? 510: - tokens = tokens[:510] - print("Truncated to 510 tokens") - ref_s = voicepack[len(tokens)] - out = forward(model, tokens, ref_s, speed) - ps = "".join(next(k for k, v in VOCAB.items() if i == v) for i in tokens) - return out, ps diff --git a/pyproject.toml b/pyproject.toml index 68434e1..7f91bce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ gpu = [ "torch==2.5.1+cu121", ] cpu = [ - "torch==2.5.1+cpu", + "torch==2.5.1", ] test = [ "pytest==8.0.0", diff --git a/ui/lib/config.py b/ui/lib/config.py index 406cdf0..1e6cfe8 100644 --- a/ui/lib/config.py +++ b/ui/lib/config.py @@ -6,10 +6,11 @@ API_PORT = os.getenv("API_PORT", "8880") API_URL = f"http://{API_HOST}:{API_PORT}" # File paths -INPUTS_DIR = "/app/ui/data/inputs" -OUTPUTS_DIR = "/app/ui/data/outputs" +INPUTS_DIR = "app/ui/data/inputs" +OUTPUTS_DIR = "app/ui/data/outputs" # Create directories if they don't exist + os.makedirs(INPUTS_DIR, exist_ok=True) os.makedirs(OUTPUTS_DIR, exist_ok=True)