Performance: Adjust session timeout and GPU memory limit; minim voice pre-caching and improve singleton instance management

This commit is contained in:
remsky 2025-01-24 05:01:38 -07:00
parent ee1f7cde18
commit 20658f9759
4 changed files with 18 additions and 15 deletions

View file

@ -8,7 +8,7 @@ class ONNXCPUConfig(BaseModel):
# Session pooling # Session pooling
max_instances: int = Field(4, description="Maximum concurrent model instances") max_instances: int = Field(4, description="Maximum concurrent model instances")
instance_timeout: int = Field(300, description="Session timeout in seconds") instance_timeout: int = Field(60, description="Session timeout in seconds")
# Runtime settings # Runtime settings
num_threads: int = Field(8, description="Number of threads for parallel operations") num_threads: int = Field(8, description="Number of threads for parallel operations")
@ -27,7 +27,7 @@ class ONNXGPUConfig(ONNXCPUConfig):
# CUDA settings # CUDA settings
device_id: int = Field(0, description="CUDA device ID") device_id: int = Field(0, description="CUDA device ID")
gpu_mem_limit: float = Field(0.7, description="Fraction of GPU memory to use") gpu_mem_limit: float = Field(0.5, description="Fraction of GPU memory to use")
cudnn_conv_algo_search: str = Field("EXHAUSTIVE", description="CuDNN convolution algorithm search") cudnn_conv_algo_search: str = Field("EXHAUSTIVE", description="CuDNN convolution algorithm search")
# Stream management # Stream management

View file

@ -118,14 +118,8 @@ class ModelManager:
# Initialize model with warmup voice # Initialize model with warmup voice
await self.load_model(model_path, warmup_voice, backend_type) await self.load_model(model_path, warmup_voice, backend_type)
# Pre-cache common voices in background # Only pre-cache default voice to avoid memory bloat
common_voices = ['af', 'af_bella', 'af_sky', 'af_nicole'] logger.info(f"Using {settings.default_voice} as warmup voice")
for voice_name in common_voices:
try:
await voice_manager.load_voice(voice_name, device=backend.device)
logger.debug(f"Pre-cached voice {voice_name}")
except Exception as e:
logger.warning(f"Failed to pre-cache voice {voice_name}: {e}")
# Get available voices count # Get available voices count
voices = await voice_manager.list_voices() voices = await voice_manager.list_voices()

View file

@ -195,8 +195,9 @@ class VoiceManager:
} }
# Global singleton instance # Global singleton instance and lock
_manager_instance = None _manager_instance = None
_manager_lock = asyncio.Lock()
async def get_manager(config: Optional[VoiceConfig] = None) -> VoiceManager: async def get_manager(config: Optional[VoiceConfig] = None) -> VoiceManager:
@ -209,6 +210,14 @@ async def get_manager(config: Optional[VoiceConfig] = None) -> VoiceManager:
VoiceManager instance VoiceManager instance
""" """
global _manager_instance global _manager_instance
if _manager_instance is None:
_manager_instance = VoiceManager(config) # Fast path - return existing instance
return _manager_instance if _manager_instance is not None:
return _manager_instance
# Slow path - create new instance with lock
async with _manager_lock:
# Double-check pattern
if _manager_instance is None:
_manager_instance = VoiceManager(config)
return _manager_instance

View file

@ -41,7 +41,7 @@ class KokoroPlayer {
container: this.elements.waveContainer, container: this.elements.waveContainer,
width: this.elements.waveContainer.clientWidth, width: this.elements.waveContainer.clientWidth,
height: 80, height: 80,
style: '"ios9"', style: 'ios9',
// color: '#6366f1', // color: '#6366f1',
speed: 0.02, speed: 0.02,
amplitude: 0.7, amplitude: 0.7,