mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
Ruff check
This commit is contained in:
parent
6a2d3a54cf
commit
447f9d360c
29 changed files with 63 additions and 43 deletions
|
@ -1,5 +1,5 @@
|
||||||
from pydantic_settings import BaseSettings
|
|
||||||
import torch
|
import torch
|
||||||
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
|
|
||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
"""Base interface for Kokoro inference."""
|
"""Base interface for Kokoro inference."""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import AsyncGenerator, Optional, Tuple, Union, List
|
from typing import AsyncGenerator, List, Optional, Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
class AudioChunk:
|
class AudioChunk:
|
||||||
"""Class for audio chunks returned by model backends"""
|
"""Class for audio chunks returned by model backends"""
|
||||||
|
|
||||||
|
|
|
@ -11,9 +11,10 @@ from loguru import logger
|
||||||
from ..core import paths
|
from ..core import paths
|
||||||
from ..core.config import settings
|
from ..core.config import settings
|
||||||
from ..core.model_config import model_config
|
from ..core.model_config import model_config
|
||||||
from .base import BaseModelBackend
|
|
||||||
from .base import AudioChunk
|
|
||||||
from ..structures.schemas import WordTimestamp
|
from ..structures.schemas import WordTimestamp
|
||||||
|
from .base import AudioChunk, BaseModelBackend
|
||||||
|
|
||||||
|
|
||||||
class KokoroV1(BaseModelBackend):
|
class KokoroV1(BaseModelBackend):
|
||||||
"""Kokoro backend with controlled resource management."""
|
"""Kokoro backend with controlled resource management."""
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,8 @@ import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
from fastapi import APIRouter
|
|
||||||
import torch
|
import torch
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import GPUtil
|
import GPUtil
|
||||||
|
|
|
@ -10,18 +10,18 @@ from urllib import response
|
||||||
|
|
||||||
import aiofiles
|
import aiofiles
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ..services.streaming_audio_writer import StreamingAudioWriter
|
|
||||||
import torch
|
import torch
|
||||||
from fastapi import APIRouter, Depends, Header, HTTPException, Request, Response
|
from fastapi import APIRouter, Depends, Header, HTTPException, Request, Response
|
||||||
from fastapi.responses import FileResponse, StreamingResponse
|
from fastapi.responses import FileResponse, StreamingResponse
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from ..structures.schemas import CaptionedSpeechRequest
|
|
||||||
|
|
||||||
from ..core.config import settings
|
from ..core.config import settings
|
||||||
from ..inference.base import AudioChunk
|
from ..inference.base import AudioChunk
|
||||||
from ..services.audio import AudioService
|
from ..services.audio import AudioService
|
||||||
|
from ..services.streaming_audio_writer import StreamingAudioWriter
|
||||||
from ..services.tts_service import TTSService
|
from ..services.tts_service import TTSService
|
||||||
from ..structures import OpenAISpeechRequest
|
from ..structures import OpenAISpeechRequest
|
||||||
|
from ..structures.schemas import CaptionedSpeechRequest
|
||||||
|
|
||||||
|
|
||||||
# Load OpenAI mappings
|
# Load OpenAI mappings
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
"""Audio conversion service"""
|
"""Audio conversion service"""
|
||||||
|
|
||||||
|
import math
|
||||||
import struct
|
import struct
|
||||||
import time
|
import time
|
||||||
from typing import Tuple
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import math
|
|
||||||
import scipy.io.wavfile as wavfile
|
import scipy.io.wavfile as wavfile
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
@ -14,8 +14,9 @@ from pydub import AudioSegment
|
||||||
from torch import norm
|
from torch import norm
|
||||||
|
|
||||||
from ..core.config import settings
|
from ..core.config import settings
|
||||||
from .streaming_audio_writer import StreamingAudioWriter
|
|
||||||
from ..inference.base import AudioChunk
|
from ..inference.base import AudioChunk
|
||||||
|
from .streaming_audio_writer import StreamingAudioWriter
|
||||||
|
|
||||||
|
|
||||||
class AudioNormalizer:
|
class AudioNormalizer:
|
||||||
"""Handles audio normalization state for a single stream"""
|
"""Handles audio normalization state for a single stream"""
|
||||||
|
|
|
@ -4,11 +4,12 @@ import struct
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
import av
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
import av
|
|
||||||
|
|
||||||
class StreamingAudioWriter:
|
class StreamingAudioWriter:
|
||||||
"""Handles streaming audio format conversions"""
|
"""Handles streaming audio format conversions"""
|
||||||
|
|
|
@ -6,12 +6,13 @@ Converts them into a format suitable for text-to-speech processing.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
|
||||||
import inflect
|
import inflect
|
||||||
from numpy import number
|
from numpy import number
|
||||||
from torch import mul
|
|
||||||
from ...structures.schemas import NormalizationOptions
|
|
||||||
|
|
||||||
from text_to_num import text2num
|
from text_to_num import text2num
|
||||||
|
from torch import mul
|
||||||
|
|
||||||
|
from ...structures.schemas import NormalizationOptions
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
VALID_TLDS = [
|
VALID_TLDS = [
|
||||||
|
|
|
@ -7,10 +7,10 @@ from typing import AsyncGenerator, Dict, List, Tuple
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from ...core.config import settings
|
from ...core.config import settings
|
||||||
|
from ...structures.schemas import NormalizationOptions
|
||||||
from .normalizer import normalize_text
|
from .normalizer import normalize_text
|
||||||
from .phonemizer import phonemize
|
from .phonemizer import phonemize
|
||||||
from .vocabulary import tokenize
|
from .vocabulary import tokenize
|
||||||
from ...structures.schemas import NormalizationOptions
|
|
||||||
|
|
||||||
# Pre-compiled regex patterns for performance
|
# Pre-compiled regex patterns for performance
|
||||||
CUSTOM_PHONEMES = re.compile(r"(\[([^\]]|\n)*?\])(\(\/([^\/)]|\n)*?\/\))")
|
CUSTOM_PHONEMES = re.compile(r"(\[([^\]]|\n)*?\])(\(\/([^\/)]|\n)*?\/\))")
|
||||||
|
|
|
@ -8,7 +8,6 @@ import time
|
||||||
from typing import AsyncGenerator, List, Optional, Tuple, Union
|
from typing import AsyncGenerator, List, Optional, Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .streaming_audio_writer import StreamingAudioWriter
|
|
||||||
import torch
|
import torch
|
||||||
from kokoro import KPipeline
|
from kokoro import KPipeline
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
@ -20,6 +19,7 @@ from ..inference.model_manager import get_manager as get_model_manager
|
||||||
from ..inference.voice_manager import get_manager as get_voice_manager
|
from ..inference.voice_manager import get_manager as get_voice_manager
|
||||||
from ..structures.schemas import NormalizationOptions
|
from ..structures.schemas import NormalizationOptions
|
||||||
from .audio import AudioNormalizer, AudioService
|
from .audio import AudioNormalizer, AudioService
|
||||||
|
from .streaming_audio_writer import StreamingAudioWriter
|
||||||
from .text_processing import tokenize
|
from .text_processing import tokenize
|
||||||
from .text_processing.text_processor import process_text_chunk, smart_split
|
from .text_processing.text_processor import process_text_chunk, smart_split
|
||||||
|
|
||||||
|
|
|
@ -5,9 +5,11 @@ from unittest.mock import patch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from api.src.services.audio import AudioNormalizer, AudioService
|
|
||||||
from api.src.inference.base import AudioChunk
|
from api.src.inference.base import AudioChunk
|
||||||
|
from api.src.services.audio import AudioNormalizer, AudioService
|
||||||
from api.src.services.streaming_audio_writer import StreamingAudioWriter
|
from api.src.services.streaming_audio_writer import StreamingAudioWriter
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def mock_settings():
|
def mock_settings():
|
||||||
"""Mock settings for all tests"""
|
"""Mock settings for all tests"""
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
import pytest
|
|
||||||
from unittest.mock import patch, MagicMock
|
|
||||||
import requests
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
def test_generate_captioned_speech():
|
def test_generate_captioned_speech():
|
||||||
"""Test the generate_captioned_speech function with mocked responses"""
|
"""Test the generate_captioned_speech function with mocked responses"""
|
||||||
|
|
|
@ -5,6 +5,7 @@ import pytest
|
||||||
from api.src.services.text_processing.normalizer import normalize_text
|
from api.src.services.text_processing.normalizer import normalize_text
|
||||||
from api.src.structures.schemas import NormalizationOptions
|
from api.src.structures.schemas import NormalizationOptions
|
||||||
|
|
||||||
|
|
||||||
def test_url_protocols():
|
def test_url_protocols():
|
||||||
"""Test URL protocol handling"""
|
"""Test URL protocol handling"""
|
||||||
assert (
|
assert (
|
||||||
|
|
|
@ -4,20 +4,19 @@ import os
|
||||||
from typing import AsyncGenerator, Tuple
|
from typing import AsyncGenerator, Tuple
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
from api.src.services.streaming_audio_writer import StreamingAudioWriter
|
|
||||||
|
|
||||||
from api.src.inference.base import AudioChunk
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
from api.src.core.config import settings
|
from api.src.core.config import settings
|
||||||
|
from api.src.inference.base import AudioChunk
|
||||||
from api.src.main import app
|
from api.src.main import app
|
||||||
from api.src.routers.openai_compatible import (
|
from api.src.routers.openai_compatible import (
|
||||||
get_tts_service,
|
get_tts_service,
|
||||||
load_openai_mappings,
|
load_openai_mappings,
|
||||||
stream_audio_chunks,
|
stream_audio_chunks,
|
||||||
)
|
)
|
||||||
|
from api.src.services.streaming_audio_writer import StreamingAudioWriter
|
||||||
from api.src.services.tts_service import TTSService
|
from api.src.services.tts_service import TTSService
|
||||||
from api.src.structures.schemas import OpenAISpeechRequest
|
from api.src.structures.schemas import OpenAISpeechRequest
|
||||||
|
|
||||||
|
|
|
@ -17,12 +17,13 @@ import base64
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import requests
|
import sys
|
||||||
import time
|
import time
|
||||||
import wave
|
import wave
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
def setup_args():
|
def setup_args():
|
||||||
"""Parse command line arguments"""
|
"""Parse command line arguments"""
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
import requests
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import pydub
|
import pydub
|
||||||
|
import requests
|
||||||
|
|
||||||
text="""Delving into the Abyss: A Deeper Exploration of Meaning in 5 Seconds of Summer's "Jet Black Heart"
|
text="""Delving into the Abyss: A Deeper Exploration of Meaning in 5 Seconds of Summer's "Jet Black Heart"
|
||||||
|
|
||||||
5 Seconds of Summer, initially perceived as purveyors of upbeat, radio-friendly pop-punk, embarked on a significant artistic evolution with their album Sounds Good Feels Good. Among its tracks, "Jet Black Heart" stands out as a powerful testament to this shift, moving beyond catchy melodies and embracing a darker, more emotionally complex sound. Released in 2015, the song transcends the typical themes of youthful exuberance and romantic angst, instead plunging into the depths of personal turmoil and the corrosive effects of inner darkness on interpersonal relationships. "Jet Black Heart" is not merely a song about heartbreak; it is a raw and vulnerable exploration of internal struggle, self-destructive patterns, and the precarious flicker of hope that persists even in the face of profound emotional chaos. Through potent metaphors, starkly honest lyrics, and a sonic landscape that mirrors its thematic weight, the song offers a profound meditation on the human condition, grappling with the shadows that reside within us all and their far-reaching consequences.
|
5 Seconds of Summer, initially perceived as purveyors of upbeat, radio-friendly pop-punk, embarked on a significant artistic evolution with their album Sounds Good Feels Good. Among its tracks, "Jet Black Heart" stands out as a powerful testament to this shift, moving beyond catchy melodies and embracing a darker, more emotionally complex sound. Released in 2015, the song transcends the typical themes of youthful exuberance and romantic angst, instead plunging into the depths of personal turmoil and the corrosive effects of inner darkness on interpersonal relationships. "Jet Black Heart" is not merely a song about heartbreak; it is a raw and vulnerable exploration of internal struggle, self-destructive patterns, and the precarious flicker of hope that persists even in the face of profound emotional chaos. Through potent metaphors, starkly honest lyrics, and a sonic landscape that mirrors its thematic weight, the song offers a profound meditation on the human condition, grappling with the shadows that reside within us all and their far-reaching consequences.
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import requests
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
text="""the administration has offered up a platter of repression for more than a year and is still slated to lose $400 million.
|
text="""the administration has offered up a platter of repression for more than a year and is still slated to lose $400 million.
|
||||||
|
|
||||||
Columbia is the largest private landowner in New York City and boasts an endowment of $14.8 billion;"""
|
Columbia is the largest private landowner in New York City and boasts an endowment of $14.8 billion;"""
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from text_to_num import text2num
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import inflect
|
import inflect
|
||||||
|
from text_to_num import text2num
|
||||||
from torch import mul
|
from torch import mul
|
||||||
|
|
||||||
INFLECT_ENGINE = inflect.engine()
|
INFLECT_ENGINE = inflect.engine()
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import requests
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
text="""Delving into the Abyss: A Deeper Exploration of Meaning in 5 Seconds of Summer's "Jet Black Heart"
|
text="""Delving into the Abyss: A Deeper Exploration of Meaning in 5 Seconds of Summer's "Jet Black Heart"
|
||||||
|
|
||||||
5 Seconds of Summer, initially perceived as purveyors of upbeat, radio-friendly pop-punk, embarked on a significant artistic evolution with their album Sounds Good Feels Good. Among its tracks, "Jet Black Heart" stands out as a powerful testament to this shift, moving beyond catchy melodies and embracing a darker, more emotionally complex sound. Released in 2015, the song transcends the typical themes of youthful exuberance and romantic angst, instead plunging into the depths of personal turmoil and the corrosive effects of inner darkness on interpersonal relationships. "Jet Black Heart" is not merely a song about heartbreak; it is a raw and vulnerable exploration of internal struggle, self-destructive patterns, and the precarious flicker of hope that persists even in the face of profound emotional chaos. Through potent metaphors, starkly honest lyrics, and a sonic landscape that mirrors its thematic weight, the song offers a profound meditation on the human condition, grappling with the shadows that reside within us all and their far-reaching consequences.
|
5 Seconds of Summer, initially perceived as purveyors of upbeat, radio-friendly pop-punk, embarked on a significant artistic evolution with their album Sounds Good Feels Good. Among its tracks, "Jet Black Heart" stands out as a powerful testament to this shift, moving beyond catchy melodies and embracing a darker, more emotionally complex sound. Released in 2015, the song transcends the typical themes of youthful exuberance and romantic angst, instead plunging into the depths of personal turmoil and the corrosive effects of inner darkness on interpersonal relationships. "Jet Black Heart" is not merely a song about heartbreak; it is a raw and vulnerable exploration of internal struggle, self-destructive patterns, and the precarious flicker of hope that persists even in the face of profound emotional chaos. Through potent metaphors, starkly honest lyrics, and a sonic landscape that mirrors its thematic weight, the song offers a profound meditation on the human condition, grappling with the shadows that reside within us all and their far-reaching consequences.
|
||||||
|
|
|
@ -2,9 +2,9 @@
|
||||||
Patch for misaki package to fix the EspeakWrapper.set_data_path issue.
|
Patch for misaki package to fix the EspeakWrapper.set_data_path issue.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import importlib.util
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import importlib.util
|
|
||||||
|
|
||||||
# Find the misaki package
|
# Find the misaki package
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import tomli
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import tomli
|
||||||
|
|
||||||
|
|
||||||
def extract_dependency_info():
|
def extract_dependency_info():
|
||||||
"""Extract version for kokoro and misaki from pyproject.toml"""
|
"""Extract version for kokoro and misaki from pyproject.toml"""
|
||||||
with open("pyproject.toml", "rb") as f:
|
with open("pyproject.toml", "rb") as f:
|
||||||
|
|
|
@ -7,9 +7,10 @@ in pyproject.toml, the Helm chart, and README.md.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import yaml
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
# Get the project root directory
|
# Get the project root directory
|
||||||
ROOT_DIR = Path(__file__).parent.parent
|
ROOT_DIR = Path(__file__).parent.parent
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import pytest
|
|
||||||
from unittest.mock import AsyncMock, Mock
|
from unittest.mock import AsyncMock, Mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from api.src.services.tts_service import TTSService
|
from api.src.services.tts_service import TTSService
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from unittest.mock import patch, mock_open
|
from unittest.mock import mock_open, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ui.lib.config import AUDIO_FORMATS
|
|
||||||
from ui.lib.components.model import create_model_column
|
from ui.lib.components.model import create_model_column
|
||||||
from ui.lib.components.output import create_output_column
|
from ui.lib.components.output import create_output_column
|
||||||
|
from ui.lib.config import AUDIO_FORMATS
|
||||||
|
|
||||||
|
|
||||||
def test_create_model_column_structure():
|
def test_create_model_column_structure():
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import os
|
|
||||||
import datetime
|
import datetime
|
||||||
from typing import List, Tuple, Optional
|
import os
|
||||||
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Tuple, Optional
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import os
|
|
||||||
import datetime
|
import datetime
|
||||||
from typing import List, Tuple, Optional
|
import os
|
||||||
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from .config import INPUTS_DIR, OUTPUTS_DIR, AUDIO_FORMATS
|
from .config import AUDIO_FORMATS, INPUTS_DIR, OUTPUTS_DIR
|
||||||
|
|
||||||
|
|
||||||
def list_input_files() -> List[str]:
|
def list_input_files() -> List[str]:
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
import gradio as gr
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import gradio as gr
|
||||||
|
|
||||||
from . import api
|
from . import api
|
||||||
from .handlers import setup_event_handlers
|
|
||||||
from .components import create_input_column, create_model_column, create_output_column
|
from .components import create_input_column, create_model_column, create_output_column
|
||||||
|
from .handlers import setup_event_handlers
|
||||||
|
|
||||||
|
|
||||||
def create_interface():
|
def create_interface():
|
||||||
|
|
Loading…
Add table
Reference in a new issue