2024-12-31 10:30:12 -05:00
|
|
|
from typing import List
|
|
|
|
|
2024-12-31 02:55:51 -07:00
|
|
|
from loguru import logger
|
2025-01-01 21:50:41 -07:00
|
|
|
from fastapi import Depends, Response, APIRouter, HTTPException
|
2024-12-31 02:55:51 -07:00
|
|
|
|
2025-01-03 00:53:41 -07:00
|
|
|
from ..services.tts_service import TTSService
|
2025-01-01 21:50:41 -07:00
|
|
|
from ..services.audio import AudioService
|
2024-12-31 02:55:51 -07:00
|
|
|
from ..structures.schemas import OpenAISpeechRequest
|
2024-12-31 01:52:16 -07:00
|
|
|
|
|
|
|
router = APIRouter(
|
|
|
|
tags=["OpenAI Compatible TTS"],
|
|
|
|
responses={404: {"description": "Not found"}},
|
|
|
|
)
|
|
|
|
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
def get_tts_service() -> TTSService:
|
|
|
|
"""Dependency to get TTSService instance with database session"""
|
2025-01-03 00:53:41 -07:00
|
|
|
return TTSService() # Initialize TTSService with default settings
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
|
|
|
|
@router.post("/audio/speech")
|
|
|
|
async def create_speech(
|
2024-12-31 01:57:00 -07:00
|
|
|
request: OpenAISpeechRequest, tts_service: TTSService = Depends(get_tts_service)
|
2024-12-31 01:52:16 -07:00
|
|
|
):
|
|
|
|
"""OpenAI-compatible endpoint for text-to-speech"""
|
|
|
|
try:
|
2024-12-31 18:55:26 -07:00
|
|
|
# Validate voice exists
|
|
|
|
available_voices = tts_service.list_voices()
|
|
|
|
if request.voice not in available_voices:
|
|
|
|
raise ValueError(
|
|
|
|
f"Voice '{request.voice}' not found. Available voices: {', '.join(sorted(available_voices))}"
|
|
|
|
)
|
2025-01-01 21:50:41 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
# Generate audio directly using TTSService's method
|
|
|
|
audio, _ = tts_service._generate_audio(
|
|
|
|
text=request.input,
|
|
|
|
voice=request.voice,
|
|
|
|
speed=request.speed,
|
2024-12-31 01:57:00 -07:00
|
|
|
stitch_long_output=True,
|
2024-12-31 01:52:16 -07:00
|
|
|
)
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
# Convert to requested format
|
|
|
|
content = AudioService.convert_audio(audio, 24000, request.response_format)
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
return Response(
|
|
|
|
content=content,
|
|
|
|
media_type=f"audio/{request.response_format}",
|
|
|
|
headers={
|
|
|
|
"Content-Disposition": f"attachment; filename=speech.{request.response_format}"
|
2024-12-31 01:57:00 -07:00
|
|
|
},
|
2024-12-31 01:52:16 -07:00
|
|
|
)
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 18:55:26 -07:00
|
|
|
except ValueError as e:
|
|
|
|
logger.error(f"Invalid request: {str(e)}")
|
|
|
|
raise HTTPException(
|
2025-01-01 21:50:41 -07:00
|
|
|
status_code=400, detail={"error": "Invalid request", "message": str(e)}
|
2024-12-31 18:55:26 -07:00
|
|
|
)
|
2024-12-31 01:52:16 -07:00
|
|
|
except Exception as e:
|
|
|
|
logger.error(f"Error generating speech: {str(e)}")
|
2024-12-31 18:55:26 -07:00
|
|
|
raise HTTPException(
|
2025-01-01 21:50:41 -07:00
|
|
|
status_code=500, detail={"error": "Server error", "message": str(e)}
|
2024-12-31 18:55:26 -07:00
|
|
|
)
|
2024-12-31 01:52:16 -07:00
|
|
|
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
@router.get("/audio/voices")
|
2024-12-31 01:57:00 -07:00
|
|
|
async def list_voices(tts_service: TTSService = Depends(get_tts_service)):
|
2024-12-31 01:52:16 -07:00
|
|
|
"""List all available voices for text-to-speech"""
|
|
|
|
try:
|
|
|
|
voices = tts_service.list_voices()
|
|
|
|
return {"voices": voices}
|
|
|
|
except Exception as e:
|
|
|
|
logger.error(f"Error listing voices: {str(e)}")
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
2024-12-31 10:30:12 -05:00
|
|
|
|
|
|
|
|
|
|
|
@router.post("/audio/voices/combine")
|
2025-01-01 21:50:41 -07:00
|
|
|
async def combine_voices(
|
|
|
|
request: List[str], tts_service: TTSService = Depends(get_tts_service)
|
|
|
|
):
|
2024-12-31 18:55:26 -07:00
|
|
|
"""Combine multiple voices into a new voice.
|
2025-01-01 21:50:41 -07:00
|
|
|
|
2024-12-31 18:55:26 -07:00
|
|
|
Args:
|
|
|
|
request: List of voice names to combine
|
2025-01-01 21:50:41 -07:00
|
|
|
|
2024-12-31 18:55:26 -07:00
|
|
|
Returns:
|
|
|
|
Dict with combined voice name and list of all available voices
|
2025-01-01 21:50:41 -07:00
|
|
|
|
2024-12-31 18:55:26 -07:00
|
|
|
Raises:
|
2025-01-01 21:50:41 -07:00
|
|
|
HTTPException:
|
2024-12-31 18:55:26 -07:00
|
|
|
- 400: Invalid request (wrong number of voices, voice not found)
|
|
|
|
- 500: Server error (file system issues, combination failed)
|
|
|
|
"""
|
2024-12-31 10:30:12 -05:00
|
|
|
try:
|
2024-12-31 18:55:26 -07:00
|
|
|
combined_voice = tts_service.combine_voices(voices=request)
|
2024-12-31 10:30:12 -05:00
|
|
|
voices = tts_service.list_voices()
|
2024-12-31 18:55:26 -07:00
|
|
|
return {"voices": voices, "voice": combined_voice}
|
2025-01-01 21:50:41 -07:00
|
|
|
|
2024-12-31 18:55:26 -07:00
|
|
|
except ValueError as e:
|
|
|
|
logger.error(f"Invalid voice combination request: {str(e)}")
|
|
|
|
raise HTTPException(
|
2025-01-01 21:50:41 -07:00
|
|
|
status_code=400, detail={"error": "Invalid request", "message": str(e)}
|
2024-12-31 18:55:26 -07:00
|
|
|
)
|
2025-01-01 21:50:41 -07:00
|
|
|
|
2024-12-31 18:55:26 -07:00
|
|
|
except RuntimeError as e:
|
|
|
|
logger.error(f"Server error during voice combination: {str(e)}")
|
|
|
|
raise HTTPException(
|
2025-01-01 21:50:41 -07:00
|
|
|
status_code=500, detail={"error": "Server error", "message": str(e)}
|
2024-12-31 18:55:26 -07:00
|
|
|
)
|
2025-01-01 21:50:41 -07:00
|
|
|
|
2024-12-31 10:30:12 -05:00
|
|
|
except Exception as e:
|
2024-12-31 18:55:26 -07:00
|
|
|
logger.error(f"Unexpected error during voice combination: {str(e)}")
|
|
|
|
raise HTTPException(
|
2025-01-01 21:50:41 -07:00
|
|
|
status_code=500, detail={"error": "Unexpected error", "message": str(e)}
|
2024-12-31 18:55:26 -07:00
|
|
|
)
|