diff --git a/api/src/core/config.py b/api/src/core/config.py index 9e2e61f..dcd9851 100644 --- a/api/src/core/config.py +++ b/api/src/core/config.py @@ -18,6 +18,8 @@ class Settings(BaseSettings): allow_local_voice_saving: bool = ( False # Whether to allow saving combined voices locally ) + repo_id: str = "hexgrad/Kokoro-82M" + kokoro_v1_file: str = "v1_0/kokoro-v1_0.pth" # Container absolute paths model_dir: str = "/app/api/src/models" # Absolute path in container diff --git a/api/src/core/model_config.py b/api/src/core/model_config.py index 47544c1..e518740 100644 --- a/api/src/core/model_config.py +++ b/api/src/core/model_config.py @@ -6,6 +6,7 @@ this module focuses on memory management and model file paths. """ from pydantic import BaseModel, Field +from core.config import settings class KokoroV1Config(BaseModel): @@ -36,7 +37,7 @@ class ModelConfig(BaseModel): # Model filename pytorch_kokoro_v1_file: str = Field( - "v1_0/kokoro-v1_0.pth", description="PyTorch Kokoro V1 model filename" + settings.kokoro_v1_file, description="PyTorch Kokoro V1 model filename" ) # Backend config diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py index cc90023..fd0bdd4 100644 --- a/api/src/inference/kokoro_v1.py +++ b/api/src/inference/kokoro_v1.py @@ -47,7 +47,7 @@ class KokoroV1(BaseModelBackend): logger.info(f"Model path: {model_path}") # Load model and let KModel handle device mapping - self._model = KModel(config=config_path, model=model_path).eval() + self._model = KModel(config=config_path, model=model_path, repo_id=settings.repo_id).eval() # Move to CUDA if needed if self._device == "cuda": self._model = self._model.cuda() @@ -57,6 +57,9 @@ class KokoroV1(BaseModelBackend): except Exception as e: raise RuntimeError(f"Failed to load Kokoro model: {e}") + def en_callable(self, text): + return next(self._pipelines['a'](text)).phonemes + def _get_pipeline(self, lang_code: str) -> KPipeline: """Get or create pipeline for language code. @@ -69,10 +72,19 @@ class KokoroV1(BaseModelBackend): if not self._model: raise RuntimeError("Model not loaded") + # When Chinese is mixed with English, it should be done like this. + if 'a' not in self._pipelines and lang_code == 'z': + lang_en = 'a' + logger.info(f"Creating new pipeline for language code: {lang_en}") + self._pipelines[lang_en] = KPipeline( + lang_code=lang_en, model=False, repo_id=settings.repo_id + ) + if lang_code not in self._pipelines: logger.info(f"Creating new pipeline for language code: {lang_code}") self._pipelines[lang_code] = KPipeline( - lang_code=lang_code, model=self._model, device=self._device + lang_code=lang_code, model=self._model, device=self._device, repo_id=settings.repo_id, + en_callable=self.en_callable ) return self._pipelines[lang_code] diff --git a/pyproject.toml b/pyproject.toml index d0ff675..044437a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,8 +31,8 @@ dependencies = [ "matplotlib>=3.10.0", "mutagen>=1.47.0", "psutil>=6.1.1", - "kokoro @ git+https://github.com/hexgrad/kokoro.git@31a2b6337b8c1b1418ef68c48142328f640da938", - 'misaki[en,ja,ko,zh] @ git+https://github.com/hexgrad/misaki.git@ebc76c21b66c5fc4866ed0ec234047177b396170', + "kokoro>=0.8.2", + 'misaki[en,ja,ko,zh]>=0.8.2', "spacy==3.7.2", "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl", "inflect>=7.5.0", diff --git a/start-gpu.sh b/start-gpu.sh index a3a2e68..c1379e4 100755 --- a/start-gpu.sh +++ b/start-gpu.sh @@ -11,6 +11,11 @@ export MODEL_DIR=src/models export VOICES_DIR=src/voices/v1_0 export WEB_PLAYER_PATH=$PROJECT_ROOT/web +# Set about the Chinese environment variable +# export DEFAULT_VOICE=zf_xiaobei +# export REPO_ID=hexgrad/Kokoro-82M-v1.1-zh +# export KOKORO_V1_FILE=v1_1-zh/kokoro-v1_1-zh.pth + # Run FastAPI with GPU extras using uv run # Note: espeak may still require manual installation, uv pip install -e ".[gpu]"