mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
Merge 6e617ed5f3
into 9091be8c3e
This commit is contained in:
commit
19fdff3431
5 changed files with 25 additions and 5 deletions
|
@ -18,6 +18,8 @@ class Settings(BaseSettings):
|
|||
allow_local_voice_saving: bool = (
|
||||
False # Whether to allow saving combined voices locally
|
||||
)
|
||||
repo_id: str = "hexgrad/Kokoro-82M"
|
||||
kokoro_v1_file: str = "v1_0/kokoro-v1_0.pth"
|
||||
|
||||
# Container absolute paths
|
||||
model_dir: str = "/app/api/src/models" # Absolute path in container
|
||||
|
|
|
@ -6,6 +6,7 @@ this module focuses on memory management and model file paths.
|
|||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from core.config import settings
|
||||
|
||||
|
||||
class KokoroV1Config(BaseModel):
|
||||
|
@ -36,7 +37,7 @@ class ModelConfig(BaseModel):
|
|||
|
||||
# Model filename
|
||||
pytorch_kokoro_v1_file: str = Field(
|
||||
"v1_0/kokoro-v1_0.pth", description="PyTorch Kokoro V1 model filename"
|
||||
settings.kokoro_v1_file, description="PyTorch Kokoro V1 model filename"
|
||||
)
|
||||
|
||||
# Backend config
|
||||
|
|
|
@ -47,7 +47,7 @@ class KokoroV1(BaseModelBackend):
|
|||
logger.info(f"Model path: {model_path}")
|
||||
|
||||
# Load model and let KModel handle device mapping
|
||||
self._model = KModel(config=config_path, model=model_path).eval()
|
||||
self._model = KModel(config=config_path, model=model_path, repo_id=settings.repo_id).eval()
|
||||
# Move to CUDA if needed
|
||||
if self._device == "cuda":
|
||||
self._model = self._model.cuda()
|
||||
|
@ -57,6 +57,9 @@ class KokoroV1(BaseModelBackend):
|
|||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to load Kokoro model: {e}")
|
||||
|
||||
def en_callable(self, text):
|
||||
return next(self._pipelines['a'](text)).phonemes
|
||||
|
||||
def _get_pipeline(self, lang_code: str) -> KPipeline:
|
||||
"""Get or create pipeline for language code.
|
||||
|
||||
|
@ -69,10 +72,19 @@ class KokoroV1(BaseModelBackend):
|
|||
if not self._model:
|
||||
raise RuntimeError("Model not loaded")
|
||||
|
||||
# When Chinese is mixed with English, it should be done like this.
|
||||
if 'a' not in self._pipelines and lang_code == 'z':
|
||||
lang_en = 'a'
|
||||
logger.info(f"Creating new pipeline for language code: {lang_en}")
|
||||
self._pipelines[lang_en] = KPipeline(
|
||||
lang_code=lang_en, model=False, repo_id=settings.repo_id
|
||||
)
|
||||
|
||||
if lang_code not in self._pipelines:
|
||||
logger.info(f"Creating new pipeline for language code: {lang_code}")
|
||||
self._pipelines[lang_code] = KPipeline(
|
||||
lang_code=lang_code, model=self._model, device=self._device
|
||||
lang_code=lang_code, model=self._model, device=self._device, repo_id=settings.repo_id,
|
||||
en_callable=self.en_callable
|
||||
)
|
||||
return self._pipelines[lang_code]
|
||||
|
||||
|
|
|
@ -31,8 +31,8 @@ dependencies = [
|
|||
"matplotlib>=3.10.0",
|
||||
"mutagen>=1.47.0",
|
||||
"psutil>=6.1.1",
|
||||
"kokoro @ git+https://github.com/hexgrad/kokoro.git@31a2b6337b8c1b1418ef68c48142328f640da938",
|
||||
'misaki[en,ja,ko,zh] @ git+https://github.com/hexgrad/misaki.git@ebc76c21b66c5fc4866ed0ec234047177b396170',
|
||||
"kokoro>=0.8.2",
|
||||
'misaki[en,ja,ko,zh]>=0.8.2',
|
||||
"spacy==3.7.2",
|
||||
"en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl",
|
||||
"inflect>=7.5.0",
|
||||
|
|
|
@ -11,6 +11,11 @@ export MODEL_DIR=src/models
|
|||
export VOICES_DIR=src/voices/v1_0
|
||||
export WEB_PLAYER_PATH=$PROJECT_ROOT/web
|
||||
|
||||
# Set about the Chinese environment variable
|
||||
# export DEFAULT_VOICE=zf_xiaobei
|
||||
# export REPO_ID=hexgrad/Kokoro-82M-v1.1-zh
|
||||
# export KOKORO_V1_FILE=v1_1-zh/kokoro-v1_1-zh.pth
|
||||
|
||||
# Run FastAPI with GPU extras using uv run
|
||||
# Note: espeak may still require manual installation,
|
||||
uv pip install -e ".[gpu]"
|
||||
|
|
Loading…
Add table
Reference in a new issue