diff --git a/api/src/services/audio.py b/api/src/services/audio.py index 4e8c215..4a45608 100644 --- a/api/src/services/audio.py +++ b/api/src/services/audio.py @@ -6,6 +6,7 @@ import numpy as np import scipy.io.wavfile as wavfile import soundfile as sf from loguru import logger +from pydub import AudioSegment from ..core.config import settings @@ -52,6 +53,9 @@ class AudioService: "flac": { "compression_level": 0.0, # Light compression, still fast }, + "aac": { + "bitrate": "192k", # Default AAC bitrate + }, } @staticmethod @@ -144,9 +148,22 @@ class AudioService: subtype="PCM_16", **settings, ) - elif output_format == "aac": - raise ValueError( - "Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm." + elif output_format == "aac": + # Convert numpy array directly to AAC using pydub + audio_segment = AudioSegment( + normalized_audio.tobytes(), + frame_rate=sample_rate, + sample_width=normalized_audio.dtype.itemsize, + channels=1 if len(normalized_audio.shape) == 1 else normalized_audio.shape[1] + ) + + settings = format_settings.get("aac", {}) if format_settings else {} + settings = {**AudioService.DEFAULT_SETTINGS["aac"], **settings} + + audio_segment.export( + buffer, + format="adts", # ADTS is a common AAC container format + bitrate=settings["bitrate"] ) else: raise ValueError( diff --git a/api/tests/test_audio_service.py b/api/tests/test_audio_service.py index bb6fb36..8131c9f 100644 --- a/api/tests/test_audio_service.py +++ b/api/tests/test_audio_service.py @@ -58,14 +58,14 @@ def test_convert_to_flac(sample_audio): assert len(result) > 0 -def test_convert_to_aac_raises_error(sample_audio): - """Test that converting to AAC raises an error""" +def test_convert_to_aac(sample_audio): + """Test converting to AAC format""" audio_data, sample_rate = sample_audio - with pytest.raises( - ValueError, - match="Failed to convert audio to aac: Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm.", - ): - AudioService.convert_audio(audio_data, sample_rate, "aac") + result = AudioService.convert_audio(audio_data, sample_rate, "aac") + assert isinstance(result, bytes) + assert len(result) > 0 + # AAC files typically start with an ADTS header + assert result.startswith(b'\xff\xf1') or result.startswith(b'\xff\xf9') def test_convert_to_pcm(sample_audio): diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile index 5075a02..e4bd32c 100644 --- a/docker/cpu/Dockerfile +++ b/docker/cpu/Dockerfile @@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ git \ libsndfile1 \ curl \ + ffmpeg \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index 9a5be8d..ed4676d 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ git \ libsndfile1 \ curl \ + ffmpeg \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/docker/shared/pyproject.toml b/docker/shared/pyproject.toml index bbff779..f45ff5e 100644 --- a/docker/shared/pyproject.toml +++ b/docker/shared/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "munch==4.0.0", "tiktoken==0.8.0", "loguru==0.7.3", + "pydub>=0.25.1", ] [project.optional-dependencies] diff --git a/pyproject.toml b/pyproject.toml index 7f91bce..8eb1632 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "openai>=1.59.6", "ebooklib>=0.18", "html2text>=2024.2.26", + "pydub>=0.25.1", ] [project.optional-dependencies]