Merged from orgin/master

2025-08-05 08:38:49 +00:00 · 2025-02-11 14:05:14 +00:00 · 2025-02-11 14:05:14 +00:00 · 68cb097d9b
commit 68cb097d9b
parent 737e49a3f9
3 changed files with 27 additions and 21 deletions
--- a/api/src/core/config.py
+++ b/api/src/core/config.py
@ -13,6 +13,7 @@ class Settings(BaseSettings):
    output_dir: str = "output"
    output_dir_size_limit_mb: float = 500.0  # Maximum size of output directory in MB
    default_voice: str = "af_heart"
    default_voice_code: str | None = None  # If set, overrides the first letter of voice name, though api call param still takes precedence
    use_gpu: bool = True  # Whether to use GPU acceleration if available
    allow_local_voice_saving: bool = (
        False  # Whether to allow saving combined voices locally
--- a/api/src/inference/kokoro_v1.py
+++ b/api/src/inference/kokoro_v1.py
@ -139,8 +139,14 @@ class KokoroV1(BaseModelBackend):
            await paths.save_voice_tensor(voice_tensor, temp_path)
            voice_path = temp_path
-            # Use provided lang_code or get from voice name
+            # Use provided lang_code, settings voice code override, or first letter of voice name
-            pipeline_lang_code = lang_code if lang_code else voice_name[0].lower()
+            if lang_code: # api is given priority
                pipeline_lang_code = lang_code
            elif settings.default_voice_code: # settings is next priority
                pipeline_lang_code = settings.default_voice_code
            else: # voice name is default/fallback
                pipeline_lang_code = voice_name[0].lower()
            pipeline = self._get_pipeline(pipeline_lang_code)
            logger.debug(
@ -231,8 +237,8 @@ class KokoroV1(BaseModelBackend):
            await paths.save_voice_tensor(voice_tensor, temp_path)
            voice_path = temp_path
-            # Use provided lang_code or get from voice name
+            # Use provided lang_code, settings voice code override, or first letter of voice name
-            pipeline_lang_code = lang_code if lang_code else voice_name[0].lower()
+            pipeline_lang_code = lang_code if lang_code else (settings.default_voice_code if settings.default_voice_code else voice_name[0].lower())
            pipeline = self._get_pipeline(pipeline_lang_code)
            logger.debug(
--- a/docker/gpu/Dockerfile
+++ b/docker/gpu/Dockerfile
@ -12,8 +12,8 @@ RUN apt-get update && apt-get install -y \
    libsndfile1 \
    curl \
    ffmpeg \
- && apt-get clean \
+    g++ \
- && rm -rf /var/lib/apt/lists/* \
+ && apt-get clean && rm -rf /var/lib/apt/lists/* \
 && mkdir -p /usr/share/espeak-ng-data \
 && ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/
@ -23,7 +23,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
    mv /root/.local/bin/uvx /usr/local/bin/ 
 # Create non-root user and set up directories and permissions
-RUN useradd -m -u 1001 appuser && \
+RUN useradd -m -u 1000 appuser && \
    mkdir -p /app/api/src/models/v1_0 && \
    chown -R appuser:appuser /app
@ -33,29 +33,28 @@ WORKDIR /app
 # Copy dependency files
 COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
 ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \
    PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
    ESPEAK_DATA_PATH=/usr/share/espeak-ng-data
 # Install dependencies with GPU extras (using cache mounts)
 RUN --mount=type=cache,target=/root/.cache/uv \
    uv venv --python 3.11 && \
    uv sync --extra gpu
-# Copy project files including models and sync again
+# Copy project files including models
 COPY --chown=appuser:appuser api ./api
 COPY --chown=appuser:appuser web ./web
 COPY --chown=appuser:appuser docker/scripts/ ./
 RUN chmod +x ./entrypoint.sh
-RUN sed -i 's/\r$//' ./entrypoint.sh
+
 RUN --mount=type=cache,target=/root/.cache/uv \
    uv sync --extra gpu
 # Set all environment variables in one go
 ENV PYTHONUNBUFFERED=1 \
    PYTHONPATH=/app:/app/api \
    PATH="/app/.venv/bin:$PATH" \
    UV_LINK_MODE=copy \
-    USE_GPU=true \
+    USE_GPU=true 
    PHONEMIZER_ESPEAK_PATH=/usr/bin \
    PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
    ESPEAK_DATA_PATH=/usr/share/espeak-ng-data
 ENV DOWNLOAD_MODEL=true
 # Download model if enabled