mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00

- Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route
108 lines
3.2 KiB
Python
108 lines
3.2 KiB
Python
import requests
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Tuple, Optional
|
|
|
|
# Get the directory this script is in
|
|
SCRIPT_DIR = Path(__file__).parent.absolute()
|
|
|
|
def get_phonemes(text: str, language: str = "a") -> Tuple[str, list[int]]:
|
|
"""Get phonemes and tokens for input text.
|
|
|
|
Args:
|
|
text: Input text to convert to phonemes
|
|
language: Language code (defaults to "a" for American English)
|
|
|
|
Returns:
|
|
Tuple of (phonemes string, token list)
|
|
"""
|
|
# Create the request payload
|
|
payload = {
|
|
"text": text,
|
|
"language": language
|
|
}
|
|
|
|
# Make POST request to the phonemize endpoint
|
|
response = requests.post(
|
|
"http://localhost:8880/text/phonemize",
|
|
json=payload
|
|
)
|
|
|
|
# Raise exception for error status codes
|
|
response.raise_for_status()
|
|
|
|
# Parse the response
|
|
result = response.json()
|
|
return result["phonemes"], result["tokens"]
|
|
|
|
def generate_audio_from_phonemes(phonemes: str, voice: str = "af_bella", speed: float = 1.0) -> Optional[bytes]:
|
|
"""Generate audio from phonemes.
|
|
|
|
Args:
|
|
phonemes: Phoneme string to synthesize
|
|
voice: Voice ID to use (defaults to af_bella)
|
|
speed: Speed factor (defaults to 1.0)
|
|
|
|
Returns:
|
|
WAV audio bytes if successful, None if failed
|
|
"""
|
|
# Create the request payload
|
|
payload = {
|
|
"phonemes": phonemes,
|
|
"voice": voice,
|
|
"speed": speed
|
|
}
|
|
|
|
# Make POST request to generate audio
|
|
response = requests.post(
|
|
"http://localhost:8880/text/generate_from_phonemes",
|
|
json=payload
|
|
)
|
|
|
|
# Raise exception for error status codes
|
|
response.raise_for_status()
|
|
|
|
return response.content
|
|
|
|
def main():
|
|
# Example texts to convert
|
|
examples = [
|
|
"Hello world! Welcome to the phoneme generation system.",
|
|
"How are you today? I am doing reasonably well, thank you for asking",
|
|
"""This is a test of the phoneme generation system. Do not be alarmed.
|
|
This is only a test. If this were a real phoneme emergency, '
|
|
you would be instructed to a phoneme shelter in your area."""
|
|
]
|
|
|
|
print("Generating phonemes and audio for example texts...\n")
|
|
|
|
# Create output directory in same directory as script
|
|
output_dir = SCRIPT_DIR / "output"
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
for i, text in enumerate(examples):
|
|
print(f"{len(text)}: Input text: {text}")
|
|
try:
|
|
# Get phonemes
|
|
phonemes, tokens = get_phonemes(text)
|
|
print(f"{len(phonemes)} Phonemes: {phonemes}")
|
|
print(f"{len(tokens)} Tokens: {tokens}")
|
|
|
|
# Generate audio from phonemes
|
|
print("Generating audio...")
|
|
audio_bytes = generate_audio_from_phonemes(phonemes)
|
|
|
|
if audio_bytes:
|
|
# Save audio file
|
|
output_path = output_dir / f"example_{i+1}.wav"
|
|
with output_path.open("wb") as f:
|
|
f.write(audio_bytes)
|
|
print(f"Audio saved to: {output_path}")
|
|
|
|
print()
|
|
|
|
except requests.RequestException as e:
|
|
print(f"Error: {e}\n")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|