mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
add AAC audio format and test
This commit is contained in:
parent
57f3cf6338
commit
d51d861861
6 changed files with 31 additions and 10 deletions
|
@ -6,6 +6,7 @@ import numpy as np
|
|||
import scipy.io.wavfile as wavfile
|
||||
import soundfile as sf
|
||||
from loguru import logger
|
||||
from pydub import AudioSegment
|
||||
|
||||
from ..core.config import settings
|
||||
|
||||
|
@ -52,6 +53,9 @@ class AudioService:
|
|||
"flac": {
|
||||
"compression_level": 0.0, # Light compression, still fast
|
||||
},
|
||||
"aac": {
|
||||
"bitrate": "192k", # Default AAC bitrate
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
@ -144,9 +148,22 @@ class AudioService:
|
|||
subtype="PCM_16",
|
||||
**settings,
|
||||
)
|
||||
elif output_format == "aac":
|
||||
raise ValueError(
|
||||
"Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm."
|
||||
elif output_format == "aac":
|
||||
# Convert numpy array directly to AAC using pydub
|
||||
audio_segment = AudioSegment(
|
||||
normalized_audio.tobytes(),
|
||||
frame_rate=sample_rate,
|
||||
sample_width=normalized_audio.dtype.itemsize,
|
||||
channels=1 if len(normalized_audio.shape) == 1 else normalized_audio.shape[1]
|
||||
)
|
||||
|
||||
settings = format_settings.get("aac", {}) if format_settings else {}
|
||||
settings = {**AudioService.DEFAULT_SETTINGS["aac"], **settings}
|
||||
|
||||
audio_segment.export(
|
||||
buffer,
|
||||
format="adts", # ADTS is a common AAC container format
|
||||
bitrate=settings["bitrate"]
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
|
|
|
@ -58,14 +58,14 @@ def test_convert_to_flac(sample_audio):
|
|||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_convert_to_aac_raises_error(sample_audio):
|
||||
"""Test that converting to AAC raises an error"""
|
||||
def test_convert_to_aac(sample_audio):
|
||||
"""Test converting to AAC format"""
|
||||
audio_data, sample_rate = sample_audio
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Failed to convert audio to aac: Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm.",
|
||||
):
|
||||
AudioService.convert_audio(audio_data, sample_rate, "aac")
|
||||
result = AudioService.convert_audio(audio_data, sample_rate, "aac")
|
||||
assert isinstance(result, bytes)
|
||||
assert len(result) > 0
|
||||
# AAC files typically start with an ADTS header
|
||||
assert result.startswith(b'\xff\xf1') or result.startswith(b'\xff\xf9')
|
||||
|
||||
|
||||
def test_convert_to_pcm(sample_audio):
|
||||
|
|
|
@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
git \
|
||||
libsndfile1 \
|
||||
curl \
|
||||
ffmpeg \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
git \
|
||||
libsndfile1 \
|
||||
curl \
|
||||
ffmpeg \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ dependencies = [
|
|||
"munch==4.0.0",
|
||||
"tiktoken==0.8.0",
|
||||
"loguru==0.7.3",
|
||||
"pydub>=0.25.1",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
|
|
@ -33,6 +33,7 @@ dependencies = [
|
|||
"openai>=1.59.6",
|
||||
"ebooklib>=0.18",
|
||||
"html2text>=2024.2.26",
|
||||
"pydub>=0.25.1",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
|
Loading…
Add table
Reference in a new issue