This commit is contained in:
chai51 2025-03-16 20:47:27 +08:00 committed by GitHub
commit 19fdff3431
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 25 additions and 5 deletions

View file

@ -18,6 +18,8 @@ class Settings(BaseSettings):
allow_local_voice_saving: bool = (
False # Whether to allow saving combined voices locally
)
repo_id: str = "hexgrad/Kokoro-82M"
kokoro_v1_file: str = "v1_0/kokoro-v1_0.pth"
# Container absolute paths
model_dir: str = "/app/api/src/models" # Absolute path in container

View file

@ -6,6 +6,7 @@ this module focuses on memory management and model file paths.
"""
from pydantic import BaseModel, Field
from core.config import settings
class KokoroV1Config(BaseModel):
@ -36,7 +37,7 @@ class ModelConfig(BaseModel):
# Model filename
pytorch_kokoro_v1_file: str = Field(
"v1_0/kokoro-v1_0.pth", description="PyTorch Kokoro V1 model filename"
settings.kokoro_v1_file, description="PyTorch Kokoro V1 model filename"
)
# Backend config

View file

@ -47,7 +47,7 @@ class KokoroV1(BaseModelBackend):
logger.info(f"Model path: {model_path}")
# Load model and let KModel handle device mapping
self._model = KModel(config=config_path, model=model_path).eval()
self._model = KModel(config=config_path, model=model_path, repo_id=settings.repo_id).eval()
# Move to CUDA if needed
if self._device == "cuda":
self._model = self._model.cuda()
@ -57,6 +57,9 @@ class KokoroV1(BaseModelBackend):
except Exception as e:
raise RuntimeError(f"Failed to load Kokoro model: {e}")
def en_callable(self, text):
return next(self._pipelines['a'](text)).phonemes
def _get_pipeline(self, lang_code: str) -> KPipeline:
"""Get or create pipeline for language code.
@ -69,10 +72,19 @@ class KokoroV1(BaseModelBackend):
if not self._model:
raise RuntimeError("Model not loaded")
# When Chinese is mixed with English, it should be done like this.
if 'a' not in self._pipelines and lang_code == 'z':
lang_en = 'a'
logger.info(f"Creating new pipeline for language code: {lang_en}")
self._pipelines[lang_en] = KPipeline(
lang_code=lang_en, model=False, repo_id=settings.repo_id
)
if lang_code not in self._pipelines:
logger.info(f"Creating new pipeline for language code: {lang_code}")
self._pipelines[lang_code] = KPipeline(
lang_code=lang_code, model=self._model, device=self._device
lang_code=lang_code, model=self._model, device=self._device, repo_id=settings.repo_id,
en_callable=self.en_callable
)
return self._pipelines[lang_code]

View file

@ -31,8 +31,8 @@ dependencies = [
"matplotlib>=3.10.0",
"mutagen>=1.47.0",
"psutil>=6.1.1",
"kokoro @ git+https://github.com/hexgrad/kokoro.git@31a2b6337b8c1b1418ef68c48142328f640da938",
'misaki[en,ja,ko,zh] @ git+https://github.com/hexgrad/misaki.git@ebc76c21b66c5fc4866ed0ec234047177b396170',
"kokoro>=0.8.2",
'misaki[en,ja,ko,zh]>=0.8.2',
"spacy==3.7.2",
"en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl",
"inflect>=7.5.0",

View file

@ -11,6 +11,11 @@ export MODEL_DIR=src/models
export VOICES_DIR=src/voices/v1_0
export WEB_PLAYER_PATH=$PROJECT_ROOT/web
# Set about the Chinese environment variable
# export DEFAULT_VOICE=zf_xiaobei
# export REPO_ID=hexgrad/Kokoro-82M-v1.1-zh
# export KOKORO_V1_FILE=v1_1-zh/kokoro-v1_1-zh.pth
# Run FastAPI with GPU extras using uv run
# Note: espeak may still require manual installation,
uv pip install -e ".[gpu]"