mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
Performance: Adjust session timeout and GPU memory limit; minim voice pre-caching and improve singleton instance management
This commit is contained in:
parent
ee1f7cde18
commit
20658f9759
4 changed files with 18 additions and 15 deletions
|
@ -8,7 +8,7 @@ class ONNXCPUConfig(BaseModel):
|
|||
|
||||
# Session pooling
|
||||
max_instances: int = Field(4, description="Maximum concurrent model instances")
|
||||
instance_timeout: int = Field(300, description="Session timeout in seconds")
|
||||
instance_timeout: int = Field(60, description="Session timeout in seconds")
|
||||
|
||||
# Runtime settings
|
||||
num_threads: int = Field(8, description="Number of threads for parallel operations")
|
||||
|
@ -27,7 +27,7 @@ class ONNXGPUConfig(ONNXCPUConfig):
|
|||
|
||||
# CUDA settings
|
||||
device_id: int = Field(0, description="CUDA device ID")
|
||||
gpu_mem_limit: float = Field(0.7, description="Fraction of GPU memory to use")
|
||||
gpu_mem_limit: float = Field(0.5, description="Fraction of GPU memory to use")
|
||||
cudnn_conv_algo_search: str = Field("EXHAUSTIVE", description="CuDNN convolution algorithm search")
|
||||
|
||||
# Stream management
|
||||
|
|
|
@ -118,14 +118,8 @@ class ModelManager:
|
|||
# Initialize model with warmup voice
|
||||
await self.load_model(model_path, warmup_voice, backend_type)
|
||||
|
||||
# Pre-cache common voices in background
|
||||
common_voices = ['af', 'af_bella', 'af_sky', 'af_nicole']
|
||||
for voice_name in common_voices:
|
||||
try:
|
||||
await voice_manager.load_voice(voice_name, device=backend.device)
|
||||
logger.debug(f"Pre-cached voice {voice_name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to pre-cache voice {voice_name}: {e}")
|
||||
# Only pre-cache default voice to avoid memory bloat
|
||||
logger.info(f"Using {settings.default_voice} as warmup voice")
|
||||
|
||||
# Get available voices count
|
||||
voices = await voice_manager.list_voices()
|
||||
|
|
|
@ -195,8 +195,9 @@ class VoiceManager:
|
|||
}
|
||||
|
||||
|
||||
# Global singleton instance
|
||||
# Global singleton instance and lock
|
||||
_manager_instance = None
|
||||
_manager_lock = asyncio.Lock()
|
||||
|
||||
|
||||
async def get_manager(config: Optional[VoiceConfig] = None) -> VoiceManager:
|
||||
|
@ -209,6 +210,14 @@ async def get_manager(config: Optional[VoiceConfig] = None) -> VoiceManager:
|
|||
VoiceManager instance
|
||||
"""
|
||||
global _manager_instance
|
||||
|
||||
# Fast path - return existing instance
|
||||
if _manager_instance is not None:
|
||||
return _manager_instance
|
||||
|
||||
# Slow path - create new instance with lock
|
||||
async with _manager_lock:
|
||||
# Double-check pattern
|
||||
if _manager_instance is None:
|
||||
_manager_instance = VoiceManager(config)
|
||||
return _manager_instance
|
|
@ -41,7 +41,7 @@ class KokoroPlayer {
|
|||
container: this.elements.waveContainer,
|
||||
width: this.elements.waveContainer.clientWidth,
|
||||
height: 80,
|
||||
style: '"ios9"',
|
||||
style: 'ios9',
|
||||
// color: '#6366f1',
|
||||
speed: 0.02,
|
||||
amplitude: 0.7,
|
||||
|
|
Loading…
Add table
Reference in a new issue