2024-12-31 01:52:16 -07:00
from enum import Enum
2025-01-07 03:50:08 -07:00
from typing import Literal , Union , List
2024-12-31 02:55:51 -07:00
from pydantic import Field , BaseModel
2024-12-31 01:52:16 -07:00
2025-01-07 03:50:08 -07:00
class VoiceCombineRequest ( BaseModel ) :
""" Request schema for voice combination endpoint that accepts either a string with + or a list """
voices : Union [ str , List [ str ] ] = Field (
. . . ,
description = " Either a string with voices separated by + (e.g. ' voice1+voice2 ' ) or a list of voice names to combine "
)
2024-12-31 01:52:16 -07:00
class TTSStatus ( str , Enum ) :
PENDING = " pending "
PROCESSING = " processing "
COMPLETED = " completed "
FAILED = " failed "
DELETED = " deleted " # For files removed by cleanup
# OpenAI-compatible schemas
class OpenAISpeechRequest ( BaseModel ) :
2024-12-31 02:55:51 -07:00
model : Literal [ " tts-1 " , " tts-1-hd " , " kokoro " ] = " kokoro "
2024-12-31 01:52:16 -07:00
input : str = Field ( . . . , description = " The text to generate audio for " )
2024-12-31 18:55:26 -07:00
voice : str = Field (
2025-01-01 21:50:41 -07:00
default = " af " ,
description = " The voice to use for generation. Can be a base voice or a combined voice name. " ,
2024-12-31 18:55:26 -07:00
)
2024-12-31 01:52:16 -07:00
response_format : Literal [ " mp3 " , " opus " , " aac " , " flac " , " wav " , " pcm " ] = Field (
default = " mp3 " ,
2025-01-04 17:54:54 -07:00
description = " The format to return audio in. Supported formats: mp3, opus, flac, wav, pcm. PCM format returns raw 16-bit samples without headers. AAC is not currently supported. " ,
2024-12-31 01:52:16 -07:00
)
speed : float = Field (
default = 1.0 ,
ge = 0.25 ,
le = 4.0 ,
2024-12-31 01:57:00 -07:00
description = " The speed of the generated audio. Select a value from 0.25 to 4.0. " ,
2024-12-31 01:52:16 -07:00
)
2025-01-04 17:54:54 -07:00
stream : bool = Field (
2025-01-04 22:23:59 -07:00
default = True , # Default to streaming for OpenAI compatibility
description = " If true (default), audio will be streamed as it ' s generated. Each chunk will be a complete sentence. " ,
2025-01-04 17:54:54 -07:00
)