2025-01-03 17:54:17 -07:00
|
|
|
|
import re
|
|
|
|
|
from abc import ABC, abstractmethod
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
import phonemizer
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
from .normalizer import normalize_text
|
2025-01-21 14:45:43 -05:00
|
|
|
|
phonemizers = {}
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
class PhonemizerBackend(ABC):
|
|
|
|
|
"""Abstract base class for phonemization backends"""
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
@abstractmethod
|
|
|
|
|
def phonemize(self, text: str) -> str:
|
|
|
|
|
"""Convert text to phonemes
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
Args:
|
|
|
|
|
text: Text to convert to phonemes
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
Returns:
|
|
|
|
|
Phonemized text
|
|
|
|
|
"""
|
|
|
|
|
pass
|
|
|
|
|
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
class EspeakBackend(PhonemizerBackend):
|
|
|
|
|
"""Espeak-based phonemizer implementation"""
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
def __init__(self, language: str):
|
|
|
|
|
"""Initialize espeak backend
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
Args:
|
|
|
|
|
language: Language code ('en-us' or 'en-gb')
|
|
|
|
|
"""
|
|
|
|
|
self.backend = phonemizer.backend.EspeakBackend(
|
2025-01-09 18:41:44 -07:00
|
|
|
|
language=language, preserve_punctuation=True, with_stress=True
|
2025-01-03 17:54:17 -07:00
|
|
|
|
)
|
2025-01-21 14:45:43 -05:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
self.language = language
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
def phonemize(self, text: str) -> str:
|
|
|
|
|
"""Convert text to phonemes using espeak
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
Args:
|
|
|
|
|
text: Text to convert to phonemes
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
Returns:
|
|
|
|
|
Phonemized text
|
|
|
|
|
"""
|
|
|
|
|
# Phonemize text
|
|
|
|
|
ps = self.backend.phonemize([text])
|
|
|
|
|
ps = ps[0] if ps else ""
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
# Handle special cases
|
|
|
|
|
ps = ps.replace("kəkˈoːɹoʊ", "kˈoʊkəɹoʊ").replace("kəkˈɔːɹəʊ", "kˈəʊkəɹəʊ")
|
|
|
|
|
ps = ps.replace("ʲ", "j").replace("r", "ɹ").replace("x", "k").replace("ɬ", "l")
|
|
|
|
|
ps = re.sub(r"(?<=[a-zɹː])(?=hˈʌndɹɪd)", " ", ps)
|
|
|
|
|
ps = re.sub(r' z(?=[;:,.!?¡¿—…"«»"" ]|$)', "z", ps)
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
# Language-specific rules
|
|
|
|
|
if self.language == "en-us":
|
|
|
|
|
ps = re.sub(r"(?<=nˈaɪn)ti(?!ː)", "di", ps)
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
return ps.strip()
|
|
|
|
|
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
def create_phonemizer(language: str = "a") -> PhonemizerBackend:
|
|
|
|
|
"""Factory function to create phonemizer backend
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
Args:
|
|
|
|
|
language: Language code ('a' for US English, 'b' for British English)
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
Returns:
|
|
|
|
|
Phonemizer backend instance
|
|
|
|
|
"""
|
|
|
|
|
# Map language codes to espeak language codes
|
2025-01-09 18:41:44 -07:00
|
|
|
|
lang_map = {"a": "en-us", "b": "en-gb"}
|
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
if language not in lang_map:
|
|
|
|
|
raise ValueError(f"Unsupported language code: {language}")
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
return EspeakBackend(lang_map[language])
|
|
|
|
|
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
def phonemize(text: str, language: str = "a", normalize: bool = True) -> str:
|
|
|
|
|
"""Convert text to phonemes
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
Args:
|
|
|
|
|
text: Text to convert to phonemes
|
|
|
|
|
language: Language code ('a' for US English, 'b' for British English)
|
|
|
|
|
normalize: Whether to normalize text before phonemization
|
2025-01-09 18:41:44 -07:00
|
|
|
|
|
2025-01-03 17:54:17 -07:00
|
|
|
|
Returns:
|
|
|
|
|
Phonemized text
|
|
|
|
|
"""
|
2025-01-21 14:45:43 -05:00
|
|
|
|
global phonemizers
|
2025-01-03 17:54:17 -07:00
|
|
|
|
if normalize:
|
|
|
|
|
text = normalize_text(text)
|
2025-01-21 14:45:43 -05:00
|
|
|
|
if language not in phonemizers:
|
|
|
|
|
phonemizers[language]=create_phonemizer(language)
|
|
|
|
|
return phonemizers[language].phonemize(text)
|