mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
Performance: Adjust session timeout and GPU memory limit; minim voice pre-caching and improve singleton instance management
This commit is contained in:
parent
ee1f7cde18
commit
20658f9759
4 changed files with 18 additions and 15 deletions
|
@ -8,7 +8,7 @@ class ONNXCPUConfig(BaseModel):
|
||||||
|
|
||||||
# Session pooling
|
# Session pooling
|
||||||
max_instances: int = Field(4, description="Maximum concurrent model instances")
|
max_instances: int = Field(4, description="Maximum concurrent model instances")
|
||||||
instance_timeout: int = Field(300, description="Session timeout in seconds")
|
instance_timeout: int = Field(60, description="Session timeout in seconds")
|
||||||
|
|
||||||
# Runtime settings
|
# Runtime settings
|
||||||
num_threads: int = Field(8, description="Number of threads for parallel operations")
|
num_threads: int = Field(8, description="Number of threads for parallel operations")
|
||||||
|
@ -27,7 +27,7 @@ class ONNXGPUConfig(ONNXCPUConfig):
|
||||||
|
|
||||||
# CUDA settings
|
# CUDA settings
|
||||||
device_id: int = Field(0, description="CUDA device ID")
|
device_id: int = Field(0, description="CUDA device ID")
|
||||||
gpu_mem_limit: float = Field(0.7, description="Fraction of GPU memory to use")
|
gpu_mem_limit: float = Field(0.5, description="Fraction of GPU memory to use")
|
||||||
cudnn_conv_algo_search: str = Field("EXHAUSTIVE", description="CuDNN convolution algorithm search")
|
cudnn_conv_algo_search: str = Field("EXHAUSTIVE", description="CuDNN convolution algorithm search")
|
||||||
|
|
||||||
# Stream management
|
# Stream management
|
||||||
|
|
|
@ -118,14 +118,8 @@ class ModelManager:
|
||||||
# Initialize model with warmup voice
|
# Initialize model with warmup voice
|
||||||
await self.load_model(model_path, warmup_voice, backend_type)
|
await self.load_model(model_path, warmup_voice, backend_type)
|
||||||
|
|
||||||
# Pre-cache common voices in background
|
# Only pre-cache default voice to avoid memory bloat
|
||||||
common_voices = ['af', 'af_bella', 'af_sky', 'af_nicole']
|
logger.info(f"Using {settings.default_voice} as warmup voice")
|
||||||
for voice_name in common_voices:
|
|
||||||
try:
|
|
||||||
await voice_manager.load_voice(voice_name, device=backend.device)
|
|
||||||
logger.debug(f"Pre-cached voice {voice_name}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to pre-cache voice {voice_name}: {e}")
|
|
||||||
|
|
||||||
# Get available voices count
|
# Get available voices count
|
||||||
voices = await voice_manager.list_voices()
|
voices = await voice_manager.list_voices()
|
||||||
|
|
|
@ -195,8 +195,9 @@ class VoiceManager:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Global singleton instance
|
# Global singleton instance and lock
|
||||||
_manager_instance = None
|
_manager_instance = None
|
||||||
|
_manager_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
||||||
async def get_manager(config: Optional[VoiceConfig] = None) -> VoiceManager:
|
async def get_manager(config: Optional[VoiceConfig] = None) -> VoiceManager:
|
||||||
|
@ -209,6 +210,14 @@ async def get_manager(config: Optional[VoiceConfig] = None) -> VoiceManager:
|
||||||
VoiceManager instance
|
VoiceManager instance
|
||||||
"""
|
"""
|
||||||
global _manager_instance
|
global _manager_instance
|
||||||
if _manager_instance is None:
|
|
||||||
_manager_instance = VoiceManager(config)
|
# Fast path - return existing instance
|
||||||
return _manager_instance
|
if _manager_instance is not None:
|
||||||
|
return _manager_instance
|
||||||
|
|
||||||
|
# Slow path - create new instance with lock
|
||||||
|
async with _manager_lock:
|
||||||
|
# Double-check pattern
|
||||||
|
if _manager_instance is None:
|
||||||
|
_manager_instance = VoiceManager(config)
|
||||||
|
return _manager_instance
|
|
@ -41,7 +41,7 @@ class KokoroPlayer {
|
||||||
container: this.elements.waveContainer,
|
container: this.elements.waveContainer,
|
||||||
width: this.elements.waveContainer.clientWidth,
|
width: this.elements.waveContainer.clientWidth,
|
||||||
height: 80,
|
height: 80,
|
||||||
style: '"ios9"',
|
style: 'ios9',
|
||||||
// color: '#6366f1',
|
// color: '#6366f1',
|
||||||
speed: 0.02,
|
speed: 0.02,
|
||||||
amplitude: 0.7,
|
amplitude: 0.7,
|
||||||
|
|
Loading…
Add table
Reference in a new issue