2024-12-31 01:52:16 -07:00
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from typing import Literal
|
|
|
|
from enum import Enum
|
|
|
|
|
|
|
|
|
|
|
|
class TTSStatus(str, Enum):
|
|
|
|
PENDING = "pending"
|
|
|
|
PROCESSING = "processing"
|
|
|
|
COMPLETED = "completed"
|
|
|
|
FAILED = "failed"
|
|
|
|
DELETED = "deleted" # For files removed by cleanup
|
|
|
|
|
|
|
|
|
|
|
|
# OpenAI-compatible schemas
|
|
|
|
class OpenAISpeechRequest(BaseModel):
|
|
|
|
model: Literal["tts-1", "tts-1-hd"] = "tts-1"
|
|
|
|
input: str = Field(..., description="The text to generate audio for")
|
2024-12-31 01:57:00 -07:00
|
|
|
voice: Literal[
|
|
|
|
"am_adam",
|
|
|
|
"am_michael",
|
|
|
|
"bm_lewis",
|
|
|
|
"af",
|
|
|
|
"bm_george",
|
|
|
|
"bf_isabella",
|
|
|
|
"bf_emma",
|
|
|
|
"af_sarah",
|
|
|
|
"af_bella",
|
|
|
|
] = Field(default="af", description="The voice to use for generation")
|
2024-12-31 01:52:16 -07:00
|
|
|
response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] = Field(
|
|
|
|
default="mp3",
|
2024-12-31 01:57:00 -07:00
|
|
|
description="The format to return audio in. Supported formats: mp3, opus, flac, wav. AAC and PCM are not currently supported.",
|
2024-12-31 01:52:16 -07:00
|
|
|
)
|
|
|
|
speed: float = Field(
|
|
|
|
default=1.0,
|
|
|
|
ge=0.25,
|
|
|
|
le=4.0,
|
2024-12-31 01:57:00 -07:00
|
|
|
description="The speed of the generated audio. Select a value from 0.25 to 4.0.",
|
2024-12-31 01:52:16 -07:00
|
|
|
)
|