add AAC audio format and test

2025-08-05 16:48:53 +00:00 · 2025-01-17 21:43:10 -07:00 · 2025-01-17 21:43:10 -07:00 · d51d861861
commit d51d861861
parent 57f3cf6338
6 changed files with 31 additions and 10 deletions
--- a/api/src/services/audio.py
+++ b/api/src/services/audio.py
@ -6,6 +6,7 @@ import numpy as np
 import scipy.io.wavfile as wavfile
 import soundfile as sf
 from loguru import logger
+from pydub import AudioSegment

 from ..core.config import settings

@ -52,6 +53,9 @@ class AudioService:
        "flac": {
            "compression_level": 0.0,  # Light compression, still fast
        },
+        "aac": {
+            "bitrate": "192k",  # Default AAC bitrate
+        },
    }

    @staticmethod
@ -144,9 +148,22 @@ class AudioService:
                    subtype="PCM_16",
                    **settings,
                )
-            elif output_format == "aac":
-                raise ValueError(
-                    "Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm."
+            elif output_format == "aac":           
+                # Convert numpy array directly to AAC using pydub
+                audio_segment = AudioSegment(
+                    normalized_audio.tobytes(), 
+                    frame_rate=sample_rate,
+                    sample_width=normalized_audio.dtype.itemsize,
+                    channels=1 if len(normalized_audio.shape) == 1 else normalized_audio.shape[1]
+                )
+                
+                settings = format_settings.get("aac", {}) if format_settings else {}
+                settings = {**AudioService.DEFAULT_SETTINGS["aac"], **settings}
+                
+                audio_segment.export(
+                    buffer,
+                    format="adts",  # ADTS is a common AAC container format
+                    bitrate=settings["bitrate"]
                )
            else:
                raise ValueError(
--- a/api/tests/test_audio_service.py
+++ b/api/tests/test_audio_service.py
@ -58,14 +58,14 @@ def test_convert_to_flac(sample_audio):
    assert len(result) > 0


-def test_convert_to_aac_raises_error(sample_audio):
-    """Test that converting to AAC raises an error"""
+def test_convert_to_aac(sample_audio):
+    """Test converting to AAC format"""
    audio_data, sample_rate = sample_audio
-    with pytest.raises(
-        ValueError,
-        match="Failed to convert audio to aac: Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm.",
-    ):
-        AudioService.convert_audio(audio_data, sample_rate, "aac")
+    result = AudioService.convert_audio(audio_data, sample_rate, "aac")
+    assert isinstance(result, bytes)
+    assert len(result) > 0
+    # AAC files typically start with an ADTS header
+    assert result.startswith(b'\xff\xf1') or result.startswith(b'\xff\xf9')


 def test_convert_to_pcm(sample_audio):
--- a/docker/cpu/Dockerfile
+++ b/docker/cpu/Dockerfile
@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    libsndfile1 \
    curl \
+    ffmpeg \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

--- a/docker/gpu/Dockerfile
+++ b/docker/gpu/Dockerfile
@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    libsndfile1 \
    curl \
+    ffmpeg \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

--- a/docker/shared/pyproject.toml
+++ b/docker/shared/pyproject.toml
@ -33,6 +33,7 @@ dependencies = [
    "munch==4.0.0",
    "tiktoken==0.8.0",
    "loguru==0.7.3",
+    "pydub>=0.25.1",
 ]

 [project.optional-dependencies]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -33,6 +33,7 @@ dependencies = [
    "openai>=1.59.6",
    "ebooklib>=0.18",
    "html2text>=2024.2.26",
+    "pydub>=0.25.1",
 ]

 [project.optional-dependencies]