From 68cb097d9b47744e4523faee5adee48aee1a7e9a Mon Sep 17 00:00:00 2001 From: Fireblade2534 Date: Tue, 11 Feb 2025 14:05:14 +0000 Subject: [PATCH] Merged from orgin/master --- api/src/core/config.py | 1 + api/src/inference/kokoro_v1.py | 14 ++++++++++---- docker/gpu/Dockerfile | 33 ++++++++++++++++----------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/api/src/core/config.py b/api/src/core/config.py index d361a5c..f5fd569 100644 --- a/api/src/core/config.py +++ b/api/src/core/config.py @@ -13,6 +13,7 @@ class Settings(BaseSettings): output_dir: str = "output" output_dir_size_limit_mb: float = 500.0 # Maximum size of output directory in MB default_voice: str = "af_heart" + default_voice_code: str | None = None # If set, overrides the first letter of voice name, though api call param still takes precedence use_gpu: bool = True # Whether to use GPU acceleration if available allow_local_voice_saving: bool = ( False # Whether to allow saving combined voices locally diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py index 248593e..99e76fa 100644 --- a/api/src/inference/kokoro_v1.py +++ b/api/src/inference/kokoro_v1.py @@ -139,8 +139,14 @@ class KokoroV1(BaseModelBackend): await paths.save_voice_tensor(voice_tensor, temp_path) voice_path = temp_path - # Use provided lang_code or get from voice name - pipeline_lang_code = lang_code if lang_code else voice_name[0].lower() + # Use provided lang_code, settings voice code override, or first letter of voice name + if lang_code: # api is given priority + pipeline_lang_code = lang_code + elif settings.default_voice_code: # settings is next priority + pipeline_lang_code = settings.default_voice_code + else: # voice name is default/fallback + pipeline_lang_code = voice_name[0].lower() + pipeline = self._get_pipeline(pipeline_lang_code) logger.debug( @@ -231,8 +237,8 @@ class KokoroV1(BaseModelBackend): await paths.save_voice_tensor(voice_tensor, temp_path) voice_path = temp_path - # Use provided lang_code or get from voice name - pipeline_lang_code = lang_code if lang_code else voice_name[0].lower() + # Use provided lang_code, settings voice code override, or first letter of voice name + pipeline_lang_code = lang_code if lang_code else (settings.default_voice_code if settings.default_voice_code else voice_name[0].lower()) pipeline = self._get_pipeline(pipeline_lang_code) logger.debug( diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index b19cf80..7e4606e 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -12,51 +12,50 @@ RUN apt-get update && apt-get install -y \ libsndfile1 \ curl \ ffmpeg \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ + g++ \ + && apt-get clean && rm -rf /var/lib/apt/lists/* \ && mkdir -p /usr/share/espeak-ng-data \ && ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ # Install UV using the installer script RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ mv /root/.local/bin/uv /usr/local/bin/ && \ - mv /root/.local/bin/uvx /usr/local/bin/ - -# Create non-root user and set up directories and permissions -RUN useradd -m -u 1001 appuser && \ - mkdir -p /app/api/src/models/v1_0 && \ - chown -R appuser:appuser /app + mv /root/.local/bin/uvx /usr/local/bin/ +# Create non-root user and set up directories and permissions +RUN useradd -m -u 1000 appuser && \ + mkdir -p /app/api/src/models/v1_0 && \ + chown -R appuser:appuser /app + USER appuser WORKDIR /app # Copy dependency files COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml +ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \ + PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \ + ESPEAK_DATA_PATH=/usr/share/espeak-ng-data + # Install dependencies with GPU extras (using cache mounts) RUN --mount=type=cache,target=/root/.cache/uv \ uv venv --python 3.11 && \ uv sync --extra gpu -# Copy project files including models and sync again +# Copy project files including models COPY --chown=appuser:appuser api ./api COPY --chown=appuser:appuser web ./web COPY --chown=appuser:appuser docker/scripts/ ./ RUN chmod +x ./entrypoint.sh -RUN sed -i 's/\r$//' ./entrypoint.sh -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --extra gpu + # Set all environment variables in one go ENV PYTHONUNBUFFERED=1 \ PYTHONPATH=/app:/app/api \ PATH="/app/.venv/bin:$PATH" \ UV_LINK_MODE=copy \ - USE_GPU=true \ - PHONEMIZER_ESPEAK_PATH=/usr/bin \ - PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \ - ESPEAK_DATA_PATH=/usr/share/espeak-ng-data - + USE_GPU=true + ENV DOWNLOAD_MODEL=true # Download model if enabled RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \