Kokoro-FastAPI/api/src/main.py


"""
FastAPI OpenAI Compatible API
"""

import os
import sys
from contextlib import asynccontextmanager

import torch
import uvicorn
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from loguru import logger

from .core.config import settings
from .routers.development import router as dev_router
from .routers.openai_compatible import router as openai_router
from .services.tts_service import TTSService


def setup_logger():
    """Configure loguru logger with custom formatting"""
    config = {
        "handlers": [
            {
                "sink": sys.stdout,
                "format": "<fg #2E8B57>{time:hh:mm:ss A}</fg #2E8B57> | "
                "{level: <8} | "
                "{message}",
                "colorize": True,
                "level": "INFO",
            },
        ],
    }
    logger.remove()
    logger.configure(**config)
    logger.level("ERROR", color="<red>")


# Configure logger
setup_logger()

@asynccontextmanager
async def lifespan(app: FastAPI):
    """Lifespan context manager for model initialization"""
    from .inference.model_manager import get_manager
    from .inference.voice_manager import get_manager as get_voice_manager

    logger.info("Loading TTS model and voice packs...")

    try:
        # Initialize managers globally
        model_manager = await get_manager()
        voice_manager = await get_voice_manager()

        # Determine backend type based on settings
        if settings.use_gpu and torch.cuda.is_available():
            backend_type = 'pytorch_gpu' if not settings.use_onnx else 'onnx_gpu'
        else:
            backend_type = 'pytorch_cpu' if not settings.use_onnx else 'onnx_cpu'

        # Get backend and initialize model
        backend = model_manager.get_backend(backend_type)
        
        # Use model path directly from settings
        model_file = settings.pytorch_model_file if not settings.use_onnx else settings.onnx_model_file
        model_path = os.path.join(settings.model_dir, model_file)
        
        
        if not os.path.exists(model_path):
            raise RuntimeError(f"Model file not found: {model_path}")

        # Pre-cache default voice and use for warmup
        warmup_voice = await voice_manager.load_voice(settings.default_voice, device=backend.device)
        logger.info(f"Pre-cached voice {settings.default_voice} for warmup")
        
        # Initialize model with warmup voice
        await model_manager.load_model(model_path, warmup_voice, backend_type)

        # Pre-cache common voices in background
        common_voices = ['af', 'af_bella', 'af_sarah', 'af_nicole']
        for voice_name in common_voices:
            try:
                await voice_manager.load_voice(voice_name, device=backend.device)
                logger.debug(f"Pre-cached voice {voice_name}")
            except Exception as e:
                logger.warning(f"Failed to pre-cache voice {voice_name}: {e}")

        # Get available voices for startup message
        voices = await voice_manager.list_voices()
        voicepack_count = len(voices)

        # Get device info for startup message
        device = "GPU" if settings.use_gpu else "CPU"
        model = "ONNX" if settings.use_onnx else "PyTorch"
    except Exception as e:
        logger.error(f"Failed to initialize model: {e}")
        raise
    boundary = "░" * 2*12
    startup_msg = f"""

{boundary}

    ╔═╗┌─┐┌─┐┌┬┐
    ╠╣ ├─┤└─┐ │
    ╚  ┴ ┴└─┘ ┴
    ╦╔═┌─┐┬┌─┌─┐
    ╠╩╗│ │├┴┐│ │
    ╩ ╩└─┘┴ ┴└─┘

{boundary}
                """
    startup_msg += f"\nModel warmed up on {device}: {model}"
    startup_msg += f"\n{voicepack_count} voice packs loaded\n"
    startup_msg += f"\n{boundary}\n"
    logger.info(startup_msg)

    yield


# Initialize FastAPI app
app = FastAPI(
    title=settings.api_title,
    description=settings.api_description,
    version=settings.api_version,
    lifespan=lifespan,
    openapi_url="/openapi.json",  # Explicitly enable OpenAPI schema
)

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Include routers
app.include_router(openai_router, prefix="/v1")
app.include_router(dev_router)  # New development endpoints
# app.include_router(text_router)  # Deprecated but still live for backwards compatibility


# Health check endpoint
@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {"status": "healthy"}


@app.get("/v1/test")
async def test_endpoint():
    """Test endpoint to verify routing"""
    return {"status": "ok"}


if __name__ == "__main__":
    uvicorn.run("api.src.main:app", host=settings.host, port=settings.port, reload=True)
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`"""`
			`FastAPI OpenAI Compatible API`
			`"""`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`import os`
- Added GenerateFromPhonemesRequest model to text_schemas.py - Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route 2025-01-09 07:20:14 -07:00			`import sys`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`from contextlib import asynccontextmanager`
Refactor TTS API and enhance testing setup with coverage and logging improvements 2024-12-31 02:55:51 -07:00
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`import torch`
Refactor TTS API and enhance testing setup with coverage and logging improvements 2024-12-31 02:55:51 -07:00			`import uvicorn`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`from fastapi import FastAPI`
			`from fastapi.middleware.cors import CORSMiddleware`
Ruff checks, ci fix 2025-01-13 20:15:46 -07:00			`from loguru import logger`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00
			`from .core.config import settings`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`from .routers.development import router as dev_router`
Refactor TTS API and enhance testing setup with coverage and logging improvements 2024-12-31 02:55:51 -07:00			`from .routers.openai_compatible import router as openai_router`
Ruff checks, ci fix 2025-01-13 20:15:46 -07:00			`from .services.tts_service import TTSService`
- Added GenerateFromPhonemesRequest model to text_schemas.py - Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route 2025-01-09 07:20:14 -07:00

			`def setup_logger():`
			`"""Configure loguru logger with custom formatting"""`
			`config = {`
			`"handlers": [`
			`{`
			`"sink": sys.stdout,`
			`"format": "<fg #2E8B57>{time:hh:mm:ss A}</fg #2E8B57> \| "`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`"{level: <8} \| "`
			`"{message}",`
- Added GenerateFromPhonemesRequest model to text_schemas.py - Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route 2025-01-09 07:20:14 -07:00			`"colorize": True,`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`"level": "INFO",`
- Added GenerateFromPhonemesRequest model to text_schemas.py - Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route 2025-01-09 07:20:14 -07:00			`},`
			`],`
			`}`
			`logger.remove()`
			`logger.configure(**config)`
			`logger.level("ERROR", color="<red>")`


			`# Configure logger`
			`setup_logger()`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00
- SQLAlchemy integration for TTS queue management - Model pre-loading and database initialization in the FastAPI app lifespan. 2024-12-30 13:21:17 -07:00			`@asynccontextmanager`
			`async def lifespan(app: FastAPI):`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00			`"""Lifespan context manager for model initialization"""`
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`from .inference.model_manager import get_manager`
			`from .inference.voice_manager import get_manager as get_voice_manager`

Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00			`logger.info("Loading TTS model and voice packs...")`

Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`try:`
			`# Initialize managers globally`
			`model_manager = await get_manager()`
			`voice_manager = await get_voice_manager()`

			`# Determine backend type based on settings`
			`if settings.use_gpu and torch.cuda.is_available():`
			`backend_type = 'pytorch_gpu' if not settings.use_onnx else 'onnx_gpu'`
			`else:`
			`backend_type = 'pytorch_cpu' if not settings.use_onnx else 'onnx_cpu'`

			`# Get backend and initialize model`
			`backend = model_manager.get_backend(backend_type)`

			`# Use model path directly from settings`
			`model_file = settings.pytorch_model_file if not settings.use_onnx else settings.onnx_model_file`
			`model_path = os.path.join(settings.model_dir, model_file)`


			`if not os.path.exists(model_path):`
			`raise RuntimeError(f"Model file not found: {model_path}")`

			`# Pre-cache default voice and use for warmup`
			`warmup_voice = await voice_manager.load_voice(settings.default_voice, device=backend.device)`
			`logger.info(f"Pre-cached voice {settings.default_voice} for warmup")`

			`# Initialize model with warmup voice`
			`await model_manager.load_model(model_path, warmup_voice, backend_type)`

			`# Pre-cache common voices in background`
			`common_voices = ['af', 'af_bella', 'af_sarah', 'af_nicole']`
			`for voice_name in common_voices:`
			`try:`
			`await voice_manager.load_voice(voice_name, device=backend.device)`
			`logger.debug(f"Pre-cached voice {voice_name}")`
			`except Exception as e:`
			`logger.warning(f"Failed to pre-cache voice {voice_name}: {e}")`

			`# Get available voices for startup message`
			`voices = await voice_manager.list_voices()`
			`voicepack_count = len(voices)`

			`# Get device info for startup message`
			`device = "GPU" if settings.use_gpu else "CPU"`
			`model = "ONNX" if settings.use_onnx else "PyTorch"`
			`except Exception as e:`
			`logger.error(f"Failed to initialize model: {e}")`
			`raise`
fix: test of cicd 2025-01-13 20:18:02 -07:00			`boundary = "░" * 2*12`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`startup_msg = f"""`
-update soundfile version -alignment with streaming standards -audio processing config settings -more comprehensive model warmup -minor model improvements -enhancing testing, benchmarking -cool ascii logo 2025-01-06 03:32:41 -07:00
WIP: open ai compatible streaming 2025-01-04 17:55:36 -07:00			`{boundary}`
Swapped generator to preprocessing 2025-01-04 22:23:59 -07:00
			`╔═╗┌─┐┌─┐┌┬┐`
Enhance model inference: update documentation, add model download scripts for PyTorch and ONNX, and refactor configuration handling 2025-01-21 21:44:21 -07:00			`╠╣ ├─┤└─┐ │`
			`╚ ┴ ┴└─┘ ┴`
Swapped generator to preprocessing 2025-01-04 22:23:59 -07:00			`╦╔═┌─┐┬┌─┌─┐`
			`╠╩╗│ │├┴┐│ │`
			`╩ ╩└─┘┴ ┴└─┘`
WIP: open ai compatible streaming 2025-01-04 17:55:36 -07:00
			`{boundary}`
			`"""`
Enhance model inference: update documentation, add model download scripts for PyTorch and ONNX, and refactor configuration handling 2025-01-21 21:44:21 -07:00			`startup_msg += f"\nModel warmed up on {device}: {model}"`
-update soundfile version -alignment with streaming standards -audio processing config settings -more comprehensive model warmup -minor model improvements -enhancing testing, benchmarking -cool ascii logo 2025-01-06 03:32:41 -07:00			`startup_msg += f"\n{voicepack_count} voice packs loaded\n"`
WIP: open ai compatible streaming 2025-01-04 17:55:36 -07:00			`startup_msg += f"\n{boundary}\n"`
			`logger.info(startup_msg)`

- SQLAlchemy integration for TTS queue management - Model pre-loading and database initialization in the FastAPI app lifespan. 2024-12-30 13:21:17 -07:00			`yield`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`# Initialize FastAPI app`
			`app = FastAPI(`
			`title=settings.api_title,`
			`description=settings.api_description,`
			`version=settings.api_version,`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`lifespan=lifespan,`
			`openapi_url="/openapi.json", # Explicitly enable OpenAPI schema`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`)`

			`# Add CORS middleware`
			`app.add_middleware(`
			`CORSMiddleware,`
			`allow_origins=["*"],`
			`allow_credentials=True,`
			`allow_methods=["*"],`
			`allow_headers=["*"],`
			`)`

- CPU ONNX + PyTorch CUDA, functional - Incorporated text processing module as service, towards modularization and optimizations - Added text processing router for phonemization - Enhanced benchmark statistics with real-time speed metrics 2025-01-03 17:54:17 -07:00			`# Include routers`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`app.include_router(openai_router, prefix="/v1")`
- Added GenerateFromPhonemesRequest model to text_schemas.py - Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route 2025-01-09 07:20:14 -07:00			`app.include_router(dev_router) # New development endpoints`
			`# app.include_router(text_router) # Deprecated but still live for backwards compatibility`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`# Health check endpoint`
			`@app.get("/health")`
			`async def health_check():`
			`"""Health check endpoint"""`
			`return {"status": "healthy"}`

Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`@app.get("/v1/test")`
			`async def test_endpoint():`
			`"""Test endpoint to verify routing"""`
			`return {"status": "ok"}`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`if __name__ == "__main__":`
			`uvicorn.run("api.src.main:app", host=settings.host, port=settings.port, reload=True)`