Kokoro-FastAPI/api/src/routers/openai_compatible.py

from typing import List

from loguru import logger
from fastapi import Depends, Response, APIRouter, HTTPException

from ..services.tts_service import TTSService
from ..services.audio import AudioService
from ..structures.schemas import OpenAISpeechRequest

router = APIRouter(
    tags=["OpenAI Compatible TTS"],
    responses={404: {"description": "Not found"}},
)


def get_tts_service() -> TTSService:
    """Dependency to get TTSService instance with database session"""
    return TTSService()  # Initialize TTSService with default settings


@router.post("/audio/speech")
async def create_speech(
    request: OpenAISpeechRequest, tts_service: TTSService = Depends(get_tts_service)
):
    """OpenAI-compatible endpoint for text-to-speech"""
    try:
        # Validate voice exists
        available_voices = tts_service.list_voices()
        if request.voice not in available_voices:
            raise ValueError(
                f"Voice '{request.voice}' not found. Available voices: {', '.join(sorted(available_voices))}"
            )

        # Generate audio directly using TTSService's method
        audio, _ = tts_service._generate_audio(
            text=request.input,
            voice=request.voice,
            speed=request.speed,
            stitch_long_output=True,
        )

        # Convert to requested format
        content = AudioService.convert_audio(audio, 24000, request.response_format)

        return Response(
            content=content,
            media_type=f"audio/{request.response_format}",
            headers={
                "Content-Disposition": f"attachment; filename=speech.{request.response_format}"
            },
        )

    except ValueError as e:
        logger.error(f"Invalid request: {str(e)}")
        raise HTTPException(
            status_code=400, detail={"error": "Invalid request", "message": str(e)}
        )
    except Exception as e:
        logger.error(f"Error generating speech: {str(e)}")
        raise HTTPException(
            status_code=500, detail={"error": "Server error", "message": str(e)}
        )


@router.get("/audio/voices")
async def list_voices(tts_service: TTSService = Depends(get_tts_service)):
    """List all available voices for text-to-speech"""
    try:
        voices = tts_service.list_voices()
        return {"voices": voices}
    except Exception as e:
        logger.error(f"Error listing voices: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/audio/voices/combine")
async def combine_voices(
    request: List[str], tts_service: TTSService = Depends(get_tts_service)
):
    """Combine multiple voices into a new voice.

    Args:
        request: List of voice names to combine

    Returns:
        Dict with combined voice name and list of all available voices

    Raises:
        HTTPException:
            - 400: Invalid request (wrong number of voices, voice not found)
            - 500: Server error (file system issues, combination failed)
    """
    try:
        combined_voice = tts_service.combine_voices(voices=request)
        voices = tts_service.list_voices()
        return {"voices": voices, "voice": combined_voice}

    except ValueError as e:
        logger.error(f"Invalid voice combination request: {str(e)}")
        raise HTTPException(
            status_code=400, detail={"error": "Invalid request", "message": str(e)}
        )

    except RuntimeError as e:
        logger.error(f"Server error during voice combination: {str(e)}")
        raise HTTPException(
            status_code=500, detail={"error": "Server error", "message": str(e)}
        )

    except Exception as e:
        logger.error(f"Unexpected error during voice combination: {str(e)}")
        raise HTTPException(
            status_code=500, detail={"error": "Unexpected error", "message": str(e)}
        )
add ability to combine voices 2024-12-31 10:30:12 -05:00			`from typing import List`

Refactor TTS API and enhance testing setup with coverage and logging improvements 2024-12-31 02:55:51 -07:00			`from loguru import logger`
Ruff Check + Format 2025-01-01 21:50:41 -07:00			`from fastapi import Depends, Response, APIRouter, HTTPException`
Refactor TTS API and enhance testing setup with coverage and logging improvements 2024-12-31 02:55:51 -07:00
WIP, Functional for CPU: Updated for ONNX runtime support, Dockerfile and TTS Service 2025-01-03 00:53:41 -07:00			`from ..services.tts_service import TTSService`
Ruff Check + Format 2025-01-01 21:50:41 -07:00			`from ..services.audio import AudioService`
Refactor TTS API and enhance testing setup with coverage and logging improvements 2024-12-31 02:55:51 -07:00			`from ..structures.schemas import OpenAISpeechRequest`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00
			`router = APIRouter(`
			`tags=["OpenAI Compatible TTS"],`
			`responses={404: {"description": "Not found"}},`
			`)`

Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`def get_tts_service() -> TTSService:`
			`"""Dependency to get TTSService instance with database session"""`
WIP, Functional for CPU: Updated for ONNX runtime support, Dockerfile and TTS Service 2025-01-03 00:53:41 -07:00			`return TTSService() # Initialize TTSService with default settings`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00
			`@router.post("/audio/speech")`
			`async def create_speech(`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00			`request: OpenAISpeechRequest, tts_service: TTSService = Depends(get_tts_service)`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`):`
			`"""OpenAI-compatible endpoint for text-to-speech"""`
			`try:`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`# Validate voice exists`
			`available_voices = tts_service.list_voices()`
			`if request.voice not in available_voices:`
			`raise ValueError(`
			`f"Voice '{request.voice}' not found. Available voices: {', '.join(sorted(available_voices))}"`
			`)`
Ruff Check + Format 2025-01-01 21:50:41 -07:00
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`# Generate audio directly using TTSService's method`
			`audio, _ = tts_service._generate_audio(`
			`text=request.input,`
			`voice=request.voice,`
			`speed=request.speed,`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00			`stitch_long_output=True,`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`)`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`# Convert to requested format`
			`content = AudioService.convert_audio(audio, 24000, request.response_format)`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`return Response(`
			`content=content,`
			`media_type=f"audio/{request.response_format}",`
			`headers={`
			`"Content-Disposition": f"attachment; filename=speech.{request.response_format}"`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00			`},`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`)`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`except ValueError as e:`
			`logger.error(f"Invalid request: {str(e)}")`
			`raise HTTPException(`
Ruff Check + Format 2025-01-01 21:50:41 -07:00			`status_code=400, detail={"error": "Invalid request", "message": str(e)}`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`)`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`except Exception as e:`
			`logger.error(f"Error generating speech: {str(e)}")`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`raise HTTPException(`
Ruff Check + Format 2025-01-01 21:50:41 -07:00			`status_code=500, detail={"error": "Server error", "message": str(e)}`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`)`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`@router.get("/audio/voices")`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00			`async def list_voices(tts_service: TTSService = Depends(get_tts_service)):`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`"""List all available voices for text-to-speech"""`
			`try:`
			`voices = tts_service.list_voices()`
			`return {"voices": voices}`
			`except Exception as e:`
			`logger.error(f"Error listing voices: {str(e)}")`
			`raise HTTPException(status_code=500, detail=str(e))`
add ability to combine voices 2024-12-31 10:30:12 -05:00

			`@router.post("/audio/voices/combine")`
Ruff Check + Format 2025-01-01 21:50:41 -07:00			`async def combine_voices(`
			`request: List[str], tts_service: TTSService = Depends(get_tts_service)`
			`):`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`"""Combine multiple voices into a new voice.`
Ruff Check + Format 2025-01-01 21:50:41 -07:00
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`Args:`
			`request: List of voice names to combine`
Ruff Check + Format 2025-01-01 21:50:41 -07:00
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`Returns:`
			`Dict with combined voice name and list of all available voices`
Ruff Check + Format 2025-01-01 21:50:41 -07:00
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`Raises:`
Ruff Check + Format 2025-01-01 21:50:41 -07:00			`HTTPException:`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`- 400: Invalid request (wrong number of voices, voice not found)`
			`- 500: Server error (file system issues, combination failed)`
			`"""`
add ability to combine voices 2024-12-31 10:30:12 -05:00			`try:`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`combined_voice = tts_service.combine_voices(voices=request)`
add ability to combine voices 2024-12-31 10:30:12 -05:00			`voices = tts_service.list_voices()`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`return {"voices": voices, "voice": combined_voice}`
Ruff Check + Format 2025-01-01 21:50:41 -07:00
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`except ValueError as e:`
			`logger.error(f"Invalid voice combination request: {str(e)}")`
			`raise HTTPException(`
Ruff Check + Format 2025-01-01 21:50:41 -07:00			`status_code=400, detail={"error": "Invalid request", "message": str(e)}`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`)`
Ruff Check + Format 2025-01-01 21:50:41 -07:00
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`except RuntimeError as e:`
			`logger.error(f"Server error during voice combination: {str(e)}")`
			`raise HTTPException(`
Ruff Check + Format 2025-01-01 21:50:41 -07:00			`status_code=500, detail={"error": "Server error", "message": str(e)}`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`)`
Ruff Check + Format 2025-01-01 21:50:41 -07:00
add ability to combine voices 2024-12-31 10:30:12 -05:00			`except Exception as e:`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`logger.error(f"Unexpected error during voice combination: {str(e)}")`
			`raise HTTPException(`
Ruff Check + Format 2025-01-01 21:50:41 -07:00			`status_code=500, detail={"error": "Unexpected error", "message": str(e)}`
- modified voice loading to copy on init - adjustments to the combine voices functionality - error handling and analysis 2024-12-31 18:55:26 -07:00			`)`