Kokoro-FastAPI/api/src/main.py

"""
FastAPI OpenAI Compatible API
"""

import os
import sys
from contextlib import asynccontextmanager
from pathlib import Path

import torch
import uvicorn
from fastapi import Depends, FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.security import HTTPBasic, HTTPBasicCredentials
from loguru import logger

from .core.config import settings
from .routers.debug import router as debug_router
from .routers.development import router as dev_router
from .routers.openai_compatible import router as openai_router
from .routers.web_player import router as web_router


def setup_logger():
    """Configure loguru logger with custom formatting"""
    config = {
        "handlers": [
            {
                "sink": sys.stdout,
                "format": "<fg #2E8B57>{time:hh:mm:ss A}</fg #2E8B57> | "
                "{level: <8} | "
                "<fg #4169E1>{module}:{line}</fg #4169E1> | "
                "{message}",
                "colorize": True,
                "level": "DEBUG",
            },
        ],
    }
    logger.remove()
    logger.configure(**config)
    logger.level("ERROR", color="<red>")


# Configure logger
setup_logger()

security = HTTPBasic()

def get_http_credentials(credentials: HTTPBasicCredentials = Depends(security)):
    """Conditionally verify HTTP Basic Auth credentials"""
    username = os.getenv("HTTP_USERNAME")
    password = os.getenv("HTTP_PASSWORD")

    # Skip authentication if credentials not configured
    if not username or not password:
        return

    # Perform authentication check if credentials are configured
    if (credentials.username != username or credentials.password != password):
        raise HTTPException(
            status_code=401,
            detail="Incorrect username or password",
            headers={"WWW-Authenticate": "Basic"},
        )
    return credentials.username


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Lifespan context manager for model initialization"""
    from .inference.model_manager import get_manager
    from .inference.voice_manager import get_manager as get_voice_manager
    from .services.temp_manager import cleanup_temp_files

    # Clean old temp files on startup
    await cleanup_temp_files()

    logger.info("Loading TTS model and voice packs...")

    try:
        # Initialize managers
        model_manager = await get_manager()
        voice_manager = await get_voice_manager()

        # Initialize model with warmup and get status
        device, model, voicepack_count = await model_manager.initialize_with_warmup(
            voice_manager
        )

    except Exception as e:
        logger.error(f"Failed to initialize model: {e}")
        raise

    boundary = "░" * 2 * 12
    startup_msg = f"""

{boundary}

    ╔═╗┌─┐┌─┐┌┬┐
    ╠╣ ├─┤└─┐ │ 
    ╚  ┴ ┴└─┘ ┴
    ╦╔═┌─┐┬┌─┌─┐
    ╠╩╗│ │├┴┐│ │
    ╩ ╩└─┘┴ ┴└─┘

{boundary}
                """
    startup_msg += f"\nModel warmed up on {device}: {model}"
    if device == "mps":
        startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
    elif device == "cuda":
        startup_msg += f"\nCUDA: {torch.cuda.is_available()}"
    else:
        startup_msg += "\nRunning on CPU"
    startup_msg += f"\n{voicepack_count} voice packs loaded"

    # Add web player info if enabled
    if settings.enable_web_player:
        startup_msg += (
            f"\n\nBeta Web Player: http://{settings.host}:{settings.port}/web/"
        )
        startup_msg += f"\nor http://localhost:{settings.port}/web/"
    else:
        startup_msg += "\n\nWeb Player: disabled"

    startup_msg += f"\n{boundary}\n"
    logger.info(startup_msg)

    yield


# Initialize FastAPI app
app = FastAPI(
    title=settings.api_title,
    description=settings.api_description,
    version=settings.api_version,
    lifespan=lifespan,
    openapi_url="/openapi.json",  # Explicitly enable OpenAPI schema
)

# Add CORS middleware if enabled
if settings.cors_enabled:
    app.add_middleware(
        CORSMiddleware,
        allow_origins=settings.cors_origins,
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )

# Include routers
app.include_router(openai_router, prefix="/v1", dependencies=[Depends(get_http_credentials)])
app.include_router(dev_router, dependencies=[Depends(get_http_credentials)])  # Development endpoints
app.include_router(debug_router, dependencies=[Depends(get_http_credentials)])  # Debug endpoints
if settings.enable_web_player:
    app.include_router(web_router, prefix="/web", dependencies=[Depends(get_http_credentials)])  # Web player static files


# Health check endpoint
@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {"status": "healthy"}


@app.get("/v1/test")
async def test_endpoint():
    """Test endpoint to verify routing"""
    return {"status": "ok"}


if __name__ == "__main__":
    uvicorn.run("api.src.main:app", host=settings.host, port=settings.port, reload=True)
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`"""`
			`FastAPI OpenAI Compatible API`
			`"""`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`import os`
- Added GenerateFromPhonemesRequest model to text_schemas.py - Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route 2025-01-09 07:20:14 -07:00			`import sys`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`from contextlib import asynccontextmanager`
WIP: v1_0_0 migration 2025-01-28 13:52:57 -07:00			`from pathlib import Path`
Refactor TTS API and enhance testing setup with coverage and logging improvements 2024-12-31 02:55:51 -07:00
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`import torch`
Refactor TTS API and enhance testing setup with coverage and logging improvements 2024-12-31 02:55:51 -07:00			`import uvicorn`
added basic authentication, to enable it, set HTTP_USERNAME and HTTP_PASSWORD in environment variables 2025-03-04 14:14:04 +08:00			`from fastapi import Depends, FastAPI, HTTPException`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`from fastapi.middleware.cors import CORSMiddleware`
added basic authentication, to enable it, set HTTP_USERNAME and HTTP_PASSWORD in environment variables 2025-03-04 14:14:04 +08:00			`from fastapi.security import HTTPBasic, HTTPBasicCredentials`
Ruff checks, ci fix 2025-01-13 20:15:46 -07:00			`from loguru import logger`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00
			`from .core.config import settings`
Ruff check + formatting 2025-02-09 18:32:17 -07:00			`from .routers.debug import router as debug_router`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`from .routers.development import router as dev_router`
Refactor TTS API and enhance testing setup with coverage and logging improvements 2024-12-31 02:55:51 -07:00			`from .routers.openai_compatible import router as openai_router`
Ruff check + formatting 2025-02-09 18:32:17 -07:00			`from .routers.web_player import router as web_router`
- Added GenerateFromPhonemesRequest model to text_schemas.py - Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route 2025-01-09 07:20:14 -07:00

			`def setup_logger():`
			`"""Configure loguru logger with custom formatting"""`
			`config = {`
			`"handlers": [`
			`{`
			`"sink": sys.stdout,`
			`"format": "<fg #2E8B57>{time:hh:mm:ss A}</fg #2E8B57> \| "`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`"{level: <8} \| "`
-fix voice selection not matching language phonemes -added voice language override parameter 2025-02-08 01:29:15 -07:00			`"<fg #4169E1>{module}:{line}</fg #4169E1> \| "`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`"{message}",`
- Added GenerateFromPhonemesRequest model to text_schemas.py - Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route 2025-01-09 07:20:14 -07:00			`"colorize": True,`
v1_0 multilanguage initial support -note: all languages currently installed, selected by prefix of first chosen voice in call 2025-02-03 03:33:12 -07:00			`"level": "DEBUG",`
- Added GenerateFromPhonemesRequest model to text_schemas.py - Refactored TTS model initialization methods in tts_gpu.py and tts_cpu.py - Added custom logger configuration in main.py - Deprecated text_processing router -> development route 2025-01-09 07:20:14 -07:00			`},`
			`],`
			`}`
			`logger.remove()`
			`logger.configure(**config)`
			`logger.level("ERROR", color="<red>")`


			`# Configure logger`
			`setup_logger()`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00
added basic authentication, to enable it, set HTTP_USERNAME and HTTP_PASSWORD in environment variables 2025-03-04 14:14:04 +08:00			`security = HTTPBasic()`

			`def get_http_credentials(credentials: HTTPBasicCredentials = Depends(security)):`
			`"""Conditionally verify HTTP Basic Auth credentials"""`
			`username = os.getenv("HTTP_USERNAME")`
			`password = os.getenv("HTTP_PASSWORD")`

			`# Skip authentication if credentials not configured`
			`if not username or not password:`
			`return`

			`# Perform authentication check if credentials are configured`
			`if (credentials.username != username or credentials.password != password):`
			`raise HTTPException(`
			`status_code=401,`
			`detail="Incorrect username or password",`
			`headers={"WWW-Authenticate": "Basic"},`
			`)`
			`return credentials.username`


Ruff check + formatting 2025-02-09 18:32:17 -07:00
- SQLAlchemy integration for TTS queue management - Model pre-loading and database initialization in the FastAPI app lifespan. 2024-12-30 13:21:17 -07:00			`@asynccontextmanager`
			`async def lifespan(app: FastAPI):`
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00			`"""Lifespan context manager for model initialization"""`
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`from .inference.model_manager import get_manager`
			`from .inference.voice_manager import get_manager as get_voice_manager`
Add clear text button and enhance temporary file management - Introduced a "Clear Text" button in the web interface for user convenience. - Updated temporary file management settings in the configuration. - Added new debug endpoints for system and storage information. - Improved logging levels for better debugging insights. 2025-01-29 18:29:02 -07:00			`from .services.temp_manager import cleanup_temp_files`
Implement temporary file management on openai endpoint, whole file downloads 2025-01-29 04:09:38 -07:00
			`# Clean old temp files on startup`
			`await cleanup_temp_files()`
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00			`logger.info("Loading TTS model and voice packs...")`

Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`try:`
Initial commit of Kokoro V1.0-only integration 2025-02-04 03:37:56 -07:00			`# Initialize managers`
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`model_manager = await get_manager()`
			`voice_manager = await get_voice_manager()`

-Adjust testing framework for new model -Add web player support: include static file serving and HTML interface for TTS 2025-01-22 21:11:47 -07:00			`# Initialize model with warmup and get status`
Ruff check + formatting 2025-02-09 18:32:17 -07:00			`device, model, voicepack_count = await model_manager.initialize_with_warmup(`
			`voice_manager`
			`)`
WIP: v1_0_0 migration 2025-01-28 13:52:57 -07:00
Refactor model loading and configuration: update, adjust model loading device,. add async streaming examples and remove unused warmup service. 2025-01-22 02:33:29 -07:00			`except Exception as e:`
			`logger.error(f"Failed to initialize model: {e}")`
			`raise`
Ruff check + formatting 2025-02-09 18:32:17 -07:00
			`boundary = "░" * 2 * 12`
Ruff format + fix 2025-01-09 18:41:44 -07:00			`startup_msg = f"""`
-update soundfile version -alignment with streaming standards -audio processing config settings -more comprehensive model warmup -minor model improvements -enhancing testing, benchmarking -cool ascii logo 2025-01-06 03:32:41 -07:00
WIP: open ai compatible streaming 2025-01-04 17:55:36 -07:00			`{boundary}`
Swapped generator to preprocessing 2025-01-04 22:23:59 -07:00
			`╔═╗┌─┐┌─┐┌┬┐`
WIP: v1_0_0 migration 2025-01-28 13:52:57 -07:00			`╠╣ ├─┤└─┐ │`
Enhance model inference: update documentation, add model download scripts for PyTorch and ONNX, and refactor configuration handling 2025-01-21 21:44:21 -07:00			`╚ ┴ ┴└─┘ ┴`
Swapped generator to preprocessing 2025-01-04 22:23:59 -07:00			`╦╔═┌─┐┬┌─┌─┐`
			`╠╩╗│ │├┴┐│ │`
			`╩ ╩└─┘┴ ┴└─┘`
WIP: open ai compatible streaming 2025-01-04 17:55:36 -07:00
			`{boundary}`
			`"""`
Enhance model inference: update documentation, add model download scripts for PyTorch and ONNX, and refactor configuration handling 2025-01-21 21:44:21 -07:00			`startup_msg += f"\nModel warmed up on {device}: {model}"`
added support for mps on mac with apple silicon 2025-03-10 11:58:45 +11:00			`if device == "mps":`
			`startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"`
			`elif device == "cuda":`
			`startup_msg += f"\nCUDA: {torch.cuda.is_available()}"`
			`else:`
			`startup_msg += "\nRunning on CPU"`
-Adjust testing framework for new model -Add web player support: include static file serving and HTML interface for TTS 2025-01-22 21:11:47 -07:00			`startup_msg += f"\n{voicepack_count} voice packs loaded"`
Ruff check + formatting 2025-02-09 18:32:17 -07:00
-Adjust testing framework for new model -Add web player support: include static file serving and HTML interface for TTS 2025-01-22 21:11:47 -07:00			`# Add web player info if enabled`
			`if settings.enable_web_player:`
Ruff check + formatting 2025-02-09 18:32:17 -07:00			`startup_msg += (`
			`f"\n\nBeta Web Player: http://{settings.host}:{settings.port}/web/"`
			`)`
-fix voice selection not matching language phonemes -added voice language override parameter 2025-02-08 01:29:15 -07:00			`startup_msg += f"\nor http://localhost:{settings.port}/web/"`
-Adjust testing framework for new model -Add web player support: include static file serving and HTML interface for TTS 2025-01-22 21:11:47 -07:00			`else:`
			`startup_msg += "\n\nWeb Player: disabled"`
Ruff check + formatting 2025-02-09 18:32:17 -07:00
WIP: open ai compatible streaming 2025-01-04 17:55:36 -07:00			`startup_msg += f"\n{boundary}\n"`
			`logger.info(startup_msg)`

- SQLAlchemy integration for TTS queue management - Model pre-loading and database initialization in the FastAPI app lifespan. 2024-12-30 13:21:17 -07:00			`yield`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`# Initialize FastAPI app`
			`app = FastAPI(`
			`title=settings.api_title,`
			`description=settings.api_description,`
			`version=settings.api_version,`
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`lifespan=lifespan,`
			`openapi_url="/openapi.json", # Explicitly enable OpenAPI schema`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`)`

-Adjust testing framework for new model -Add web player support: include static file serving and HTML interface for TTS 2025-01-22 21:11:47 -07:00			`# Add CORS middleware if enabled`
			`if settings.cors_enabled:`
			`app.add_middleware(`
			`CORSMiddleware,`
			`allow_origins=settings.cors_origins,`
			`allow_credentials=True,`
			`allow_methods=["*"],`
			`allow_headers=["*"],`
			`)`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00
- CPU ONNX + PyTorch CUDA, functional - Incorporated text processing module as service, towards modularization and optimizations - Added text processing router for phonemization - Enhanced benchmark statistics with real-time speed metrics 2025-01-03 17:54:17 -07:00			`# Include routers`
added basic authentication, to enable it, set HTTP_USERNAME and HTTP_PASSWORD in environment variables 2025-03-04 14:14:04 +08:00			`app.include_router(openai_router, prefix="/v1", dependencies=[Depends(get_http_credentials)])`
			`app.include_router(dev_router, dependencies=[Depends(get_http_credentials)]) # Development endpoints`
			`app.include_router(debug_router, dependencies=[Depends(get_http_credentials)]) # Debug endpoints`
-Adjust testing framework for new model -Add web player support: include static file serving and HTML interface for TTS 2025-01-22 21:11:47 -07:00			`if settings.enable_web_player:`
added basic authentication, to enable it, set HTTP_USERNAME and HTTP_PASSWORD in environment variables 2025-03-04 14:14:04 +08:00			`app.include_router(web_router, prefix="/web", dependencies=[Depends(get_http_credentials)]) # Web player static files`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`# Health check endpoint`
			`@app.get("/health")`
			`async def health_check():`
			`"""Health check endpoint"""`
			`return {"status": "healthy"}`

Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
- Complete TTS endpoint replacement with OpenAI compatible -Removed output directory, and update configuration settings - Added benchmarking for entire novel 2024-12-31 01:52:16 -07:00			`@app.get("/v1/test")`
			`async def test_endpoint():`
			`"""Test endpoint to verify routing"""`
			`return {"status": "ok"}`
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00
Enhance TTS API with logging, voice pack loading, and schema updates 2024-12-31 01:57:00 -07:00
Add initial implementation of Kokoro TTS API with Docker GPU support - Set up FastAPI application with TTS service - Define API endpoints for TTS generation and voice listing - Implement Pydantic models for request and response schemas - Add Dockerfile and docker-compose.yml for containerization - Include example usage and benchmark results in README 2024-12-30 04:17:50 -07:00			`if __name__ == "__main__":`
			`uvicorn.run("api.src.main:app", host=settings.host, port=settings.port, reload=True)`