Merge pull request #74 from richardr1126/master

Try to add AAC audio format w/ updated test
This commit is contained in:
remsky 2025-01-17 23:37:54 -07:00 committed by GitHub
commit dba8220627
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 31 additions and 10 deletions

View file

@ -6,6 +6,7 @@ import numpy as np
import scipy.io.wavfile as wavfile import scipy.io.wavfile as wavfile
import soundfile as sf import soundfile as sf
from loguru import logger from loguru import logger
from pydub import AudioSegment
from ..core.config import settings from ..core.config import settings
@ -52,6 +53,9 @@ class AudioService:
"flac": { "flac": {
"compression_level": 0.0, # Light compression, still fast "compression_level": 0.0, # Light compression, still fast
}, },
"aac": {
"bitrate": "192k", # Default AAC bitrate
},
} }
@staticmethod @staticmethod
@ -144,9 +148,22 @@ class AudioService:
subtype="PCM_16", subtype="PCM_16",
**settings, **settings,
) )
elif output_format == "aac": elif output_format == "aac":
raise ValueError( # Convert numpy array directly to AAC using pydub
"Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm." audio_segment = AudioSegment(
normalized_audio.tobytes(),
frame_rate=sample_rate,
sample_width=normalized_audio.dtype.itemsize,
channels=1 if len(normalized_audio.shape) == 1 else normalized_audio.shape[1]
)
settings = format_settings.get("aac", {}) if format_settings else {}
settings = {**AudioService.DEFAULT_SETTINGS["aac"], **settings}
audio_segment.export(
buffer,
format="adts", # ADTS is a common AAC container format
bitrate=settings["bitrate"]
) )
else: else:
raise ValueError( raise ValueError(

View file

@ -58,14 +58,14 @@ def test_convert_to_flac(sample_audio):
assert len(result) > 0 assert len(result) > 0
def test_convert_to_aac_raises_error(sample_audio): def test_convert_to_aac(sample_audio):
"""Test that converting to AAC raises an error""" """Test converting to AAC format"""
audio_data, sample_rate = sample_audio audio_data, sample_rate = sample_audio
with pytest.raises( result = AudioService.convert_audio(audio_data, sample_rate, "aac")
ValueError, assert isinstance(result, bytes)
match="Failed to convert audio to aac: Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm.", assert len(result) > 0
): # AAC files typically start with an ADTS header
AudioService.convert_audio(audio_data, sample_rate, "aac") assert result.startswith(b'\xff\xf1') or result.startswith(b'\xff\xf9')
def test_convert_to_pcm(sample_audio): def test_convert_to_pcm(sample_audio):

View file

@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
git \ git \
libsndfile1 \ libsndfile1 \
curl \ curl \
ffmpeg \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*

View file

@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
git \ git \
libsndfile1 \ libsndfile1 \
curl \ curl \
ffmpeg \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*

View file

@ -33,6 +33,7 @@ dependencies = [
"munch==4.0.0", "munch==4.0.0",
"tiktoken==0.8.0", "tiktoken==0.8.0",
"loguru==0.7.3", "loguru==0.7.3",
"pydub>=0.25.1",
] ]
[project.optional-dependencies] [project.optional-dependencies]

View file

@ -33,6 +33,7 @@ dependencies = [
"openai>=1.59.6", "openai>=1.59.6",
"ebooklib>=0.18", "ebooklib>=0.18",
"html2text>=2024.2.26", "html2text>=2024.2.26",
"pydub>=0.25.1",
] ]
[project.optional-dependencies] [project.optional-dependencies]