From d51d861861958fd383cb4ced672be4c0134d6639 Mon Sep 17 00:00:00 2001 From: Richard Roberson Date: Fri, 17 Jan 2025 21:43:10 -0700 Subject: [PATCH] add AAC audio format and test --- api/src/services/audio.py | 23 ++++++++++++++++++++--- api/tests/test_audio_service.py | 14 +++++++------- docker/cpu/Dockerfile | 1 + docker/gpu/Dockerfile | 1 + docker/shared/pyproject.toml | 1 + pyproject.toml | 1 + 6 files changed, 31 insertions(+), 10 deletions(-) diff --git a/api/src/services/audio.py b/api/src/services/audio.py index 4e8c215..4a45608 100644 --- a/api/src/services/audio.py +++ b/api/src/services/audio.py @@ -6,6 +6,7 @@ import numpy as np import scipy.io.wavfile as wavfile import soundfile as sf from loguru import logger +from pydub import AudioSegment from ..core.config import settings @@ -52,6 +53,9 @@ class AudioService: "flac": { "compression_level": 0.0, # Light compression, still fast }, + "aac": { + "bitrate": "192k", # Default AAC bitrate + }, } @staticmethod @@ -144,9 +148,22 @@ class AudioService: subtype="PCM_16", **settings, ) - elif output_format == "aac": - raise ValueError( - "Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm." + elif output_format == "aac": + # Convert numpy array directly to AAC using pydub + audio_segment = AudioSegment( + normalized_audio.tobytes(), + frame_rate=sample_rate, + sample_width=normalized_audio.dtype.itemsize, + channels=1 if len(normalized_audio.shape) == 1 else normalized_audio.shape[1] + ) + + settings = format_settings.get("aac", {}) if format_settings else {} + settings = {**AudioService.DEFAULT_SETTINGS["aac"], **settings} + + audio_segment.export( + buffer, + format="adts", # ADTS is a common AAC container format + bitrate=settings["bitrate"] ) else: raise ValueError( diff --git a/api/tests/test_audio_service.py b/api/tests/test_audio_service.py index bb6fb36..8131c9f 100644 --- a/api/tests/test_audio_service.py +++ b/api/tests/test_audio_service.py @@ -58,14 +58,14 @@ def test_convert_to_flac(sample_audio): assert len(result) > 0 -def test_convert_to_aac_raises_error(sample_audio): - """Test that converting to AAC raises an error""" +def test_convert_to_aac(sample_audio): + """Test converting to AAC format""" audio_data, sample_rate = sample_audio - with pytest.raises( - ValueError, - match="Failed to convert audio to aac: Format aac not currently supported. Supported formats are: wav, mp3, opus, flac, pcm.", - ): - AudioService.convert_audio(audio_data, sample_rate, "aac") + result = AudioService.convert_audio(audio_data, sample_rate, "aac") + assert isinstance(result, bytes) + assert len(result) > 0 + # AAC files typically start with an ADTS header + assert result.startswith(b'\xff\xf1') or result.startswith(b'\xff\xf9') def test_convert_to_pcm(sample_audio): diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile index 5075a02..e4bd32c 100644 --- a/docker/cpu/Dockerfile +++ b/docker/cpu/Dockerfile @@ -6,6 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ git \ libsndfile1 \ curl \ + ffmpeg \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index 9a5be8d..ed4676d 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ git \ libsndfile1 \ curl \ + ffmpeg \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/docker/shared/pyproject.toml b/docker/shared/pyproject.toml index bbff779..f45ff5e 100644 --- a/docker/shared/pyproject.toml +++ b/docker/shared/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "munch==4.0.0", "tiktoken==0.8.0", "loguru==0.7.3", + "pydub>=0.25.1", ] [project.optional-dependencies] diff --git a/pyproject.toml b/pyproject.toml index 7f91bce..8eb1632 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "openai>=1.59.6", "ebooklib>=0.18", "html2text>=2024.2.26", + "pydub>=0.25.1", ] [project.optional-dependencies]