Kokoro-FastAPI/examples/test_tts.py

#!/usr/bin/env python3
import argparse
import requests
import time
import sys
import os
from typing import Optional, Tuple


def get_voices(
    base_url: str = "http://localhost:8880",
) -> Optional[Tuple[list[str], str]]:
    """Get list of available voices and default voice"""
    try:
        response = requests.get(f"{base_url}/tts/voices")
        if response.status_code == 200:
            data = response.json()
            return data["voices"], data["default"]
    except requests.exceptions.RequestException as e:
        print(f"Error getting voices: {e}")
    return None


def submit_tts_request(
    text: str, voice: Optional[str] = None, speed: Optional[float] = 1.0, base_url: str = "http://localhost:8880"
) -> Optional[int]:
    """Submit a TTS request and return the request ID"""
    try:
        payload = {"text": text, "speed": speed, "voice": voice} if voice else {"text": text, "speed": speed}
        response = requests.post(f"{base_url}/tts", json=payload)
        if response.status_code != 200:
            print(f"Error submitting request: {response.text}")
            return None
        return response.json()["request_id"]
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None


def check_request_status(
    request_id: int, base_url: str = "http://localhost:8880"
) -> Optional[dict]:
    """Check the status of a request"""
    try:
        response = requests.get(f"{base_url}/tts/{request_id}")
        if response.status_code != 200:
            print(f"Error checking status: {response.text}")
            return None
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None


def download_audio(
    request_id: int, base_url: str = "http://localhost:8880"
) -> Optional[str]:
    """Download and save the generated audio file. Returns the filepath if successful."""
    try:
        response = requests.get(f"{base_url}/tts/file/{request_id}")
        if response.status_code != 200:
            print("Error downloading file")
            return None

        filename = (
            response.headers.get("content-disposition", "")
            .split("filename=")[-1]
            .strip('"')
        )
        if not filename:
            filename = f"speech_{request_id}.wav"

        filepath = os.path.join(os.path.dirname(__file__), "output", filename)
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        with open(filepath, "wb") as f:
            f.write(response.content)
        return filepath
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None


def generate_speech(
    text: str,
    voice: Optional[str] = None,
    speed: Optional[float] = 1.0,
    base_url: str = "http://localhost:8880",
    download: bool = True,
) -> bool:
    """Generate speech from text"""
    # Submit request
    print("Submitting request...")
    request_id = submit_tts_request(text, voice, speed, base_url)
    if not request_id:
        return False

    print(f"Request submitted (ID: {request_id})")

    # Poll for completion
    while True:
        status = check_request_status(request_id, base_url)
        if not status:
            return False

        if status["status"] == "completed":
            print("Generation complete!")
            if status["processing_time"]:
                print(f"Processing time: {status['processing_time']:.2f}s")

            # Show output file path (clean up any relative path components)
            output_file = status["output_file"]
            if output_file:
                output_file = os.path.normpath(output_file)
            print(f"Output file: {output_file}")

            # Download if requested
            if download:
                print("Downloading file...")
                filepath = download_audio(request_id, base_url)
                if filepath:
                    print(f"Saved to: {filepath}")
                    return True
                return False
            return True

        elif status["status"] == "failed":
            print("Generation failed")
            return False

        print(".", end="", flush=True)
        time.sleep(1)


def list_available_voices(url: str):
    """List all available voices"""
    voices = get_voices(url)
    if voices:
        voices_list, default_voice = voices
        print("Available voices:")
        for voice in voices_list:
            if voice == default_voice:
                print(f"  {voice} (default)")
            else:
                print(f"  {voice}")
    else:
        print("Error getting voices")


def main():
    parser = argparse.ArgumentParser(description="Kokoro TTS CLI")
    parser.add_argument("text", nargs="?", help="Text to convert to speech")
    parser.add_argument("--voice", help="Voice to use")
    parser.add_argument("--speed", default=1.0, help="speed of speech")
    parser.add_argument("--url", default="http://localhost:8880", help="API base URL")
    parser.add_argument("--debug", action="store_true", help="Enable debug logging")
    parser.add_argument(
        "--no-download",
        action="store_true",
        help="Don't download the file, just show the filepath",
    )
    args = parser.parse_args()

    if args.debug:
        print(f"Debug: Arguments received: {args}")

    # If no text provided, just list voices
    if not args.text:
        list_available_voices(args.url)
        return

    # Generate speech
    print(f"Generating speech for: {args.text}")
    if args.voice:
        print(f"Using voice: {args.voice}")

    if args.debug:
        print(
            f"Debug: Calling generate_speech with text='{args.text}', voice='{args.voice}'"
        )

    success = generate_speech(
        args.text, args.voice, args.speed, args.url, download=not args.no_download
    )
    if not success:
        sys.exit(1)


if __name__ == "__main__":
    main()
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`#!/usr/bin/env python3`
			`import argparse`
			`import requests`
			`import time`
			`import sys`
			`import os`
			`from typing import Optional, Tuple`


			`def get_voices(`
			`base_url: str = "http://localhost:8880",`
			`) -> Optional[Tuple[list[str], str]]:`
			`"""Get list of available voices and default voice"""`
			`try:`
			`response = requests.get(f"{base_url}/tts/voices")`
			`if response.status_code == 200:`
			`data = response.json()`
			`return data["voices"], data["default"]`
			`except requests.exceptions.RequestException as e:`
			`print(f"Error getting voices: {e}")`
			`return None`


			`def submit_tts_request(`
add speed 2024-12-30 13:39:35 -05:00			`text: str, voice: Optional[str] = None, speed: Optional[float] = 1.0, base_url: str = "http://localhost:8880"`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`) -> Optional[int]:`
			`"""Submit a TTS request and return the request ID"""`
			`try:`
add speed 2024-12-30 13:39:35 -05:00			`payload = {"text": text, "speed": speed, "voice": voice} if voice else {"text": text, "speed": speed}`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`response = requests.post(f"{base_url}/tts", json=payload)`
			`if response.status_code != 200:`
			`print(f"Error submitting request: {response.text}")`
			`return None`
			`return response.json()["request_id"]`
			`except requests.exceptions.RequestException as e:`
			`print(f"Error: {e}")`
			`return None`


			`def check_request_status(`
			`request_id: int, base_url: str = "http://localhost:8880"`
			`) -> Optional[dict]:`
			`"""Check the status of a request"""`
			`try:`
			`response = requests.get(f"{base_url}/tts/{request_id}")`
			`if response.status_code != 200:`
			`print(f"Error checking status: {response.text}")`
			`return None`
			`return response.json()`
			`except requests.exceptions.RequestException as e:`
			`print(f"Error: {e}")`
			`return None`


			`def download_audio(`
			`request_id: int, base_url: str = "http://localhost:8880"`
			`) -> Optional[str]:`
			`"""Download and save the generated audio file. Returns the filepath if successful."""`
			`try:`
			`response = requests.get(f"{base_url}/tts/file/{request_id}")`
			`if response.status_code != 200:`
			`print("Error downloading file")`
			`return None`

			`filename = (`
			`response.headers.get("content-disposition", "")`
			`.split("filename=")[-1]`
			`.strip('"')`
			`)`
			`if not filename:`
			`filename = f"speech_{request_id}.wav"`

			`filepath = os.path.join(os.path.dirname(__file__), "output", filename)`
			`os.makedirs(os.path.dirname(filepath), exist_ok=True)`
			`with open(filepath, "wb") as f:`
			`f.write(response.content)`
			`return filepath`
			`except requests.exceptions.RequestException as e:`
			`print(f"Error: {e}")`
			`return None`


			`def generate_speech(`
			`text: str,`
			`voice: Optional[str] = None,`
add speed 2024-12-30 13:39:35 -05:00			`speed: Optional[float] = 1.0,`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`base_url: str = "http://localhost:8880",`
			`download: bool = True,`
			`) -> bool:`
			`"""Generate speech from text"""`
			`# Submit request`
			`print("Submitting request...")`
add speed 2024-12-30 13:39:35 -05:00			`request_id = submit_tts_request(text, voice, speed, base_url)`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`if not request_id:`
			`return False`

			`print(f"Request submitted (ID: {request_id})")`

			`# Poll for completion`
			`while True:`
			`status = check_request_status(request_id, base_url)`
			`if not status:`
			`return False`

			`if status["status"] == "completed":`
			`print("Generation complete!")`
			`if status["processing_time"]:`
			`print(f"Processing time: {status['processing_time']:.2f}s")`

			`# Show output file path (clean up any relative path components)`
			`output_file = status["output_file"]`
			`if output_file:`
			`output_file = os.path.normpath(output_file)`
			`print(f"Output file: {output_file}")`

			`# Download if requested`
			`if download:`
			`print("Downloading file...")`
			`filepath = download_audio(request_id, base_url)`
			`if filepath:`
			`print(f"Saved to: {filepath}")`
			`return True`
			`return False`
			`return True`

			`elif status["status"] == "failed":`
			`print("Generation failed")`
			`return False`

			`print(".", end="", flush=True)`
			`time.sleep(1)`


			`def list_available_voices(url: str):`
			`"""List all available voices"""`
			`voices = get_voices(url)`
			`if voices:`
			`voices_list, default_voice = voices`
			`print("Available voices:")`
			`for voice in voices_list:`
			`if voice == default_voice:`
			`print(f" {voice} (default)")`
			`else:`
			`print(f" {voice}")`
			`else:`
			`print("Error getting voices")`


			`def main():`
			`parser = argparse.ArgumentParser(description="Kokoro TTS CLI")`
			`parser.add_argument("text", nargs="?", help="Text to convert to speech")`
			`parser.add_argument("--voice", help="Voice to use")`
add speed 2024-12-30 13:39:35 -05:00			`parser.add_argument("--speed", default=1.0, help="speed of speech")`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`parser.add_argument("--url", default="http://localhost:8880", help="API base URL")`
			`parser.add_argument("--debug", action="store_true", help="Enable debug logging")`
			`parser.add_argument(`
			`"--no-download",`
			`action="store_true",`
			`help="Don't download the file, just show the filepath",`
			`)`
			`args = parser.parse_args()`

			`if args.debug:`
			`print(f"Debug: Arguments received: {args}")`

			`# If no text provided, just list voices`
			`if not args.text:`
			`list_available_voices(args.url)`
			`return`

			`# Generate speech`
			`print(f"Generating speech for: {args.text}")`
			`if args.voice:`
			`print(f"Using voice: {args.voice}")`

			`if args.debug:`
			`print(`
			`f"Debug: Calling generate_speech with text='{args.text}', voice='{args.voice}'"`
			`)`

			`success = generate_speech(`
add speed 2024-12-30 13:39:35 -05:00			`args.text, args.voice, args.speed, args.url, download=not args.no_download`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`)`
			`if not success:`
			`sys.exit(1)`


			`if __name__ == "__main__":`
			`main()`