2024-12-31 01:52:16 -07:00
|
|
|
"""Audio conversion service"""
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
from io import BytesIO
|
2024-12-31 02:55:51 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
import numpy as np
|
|
|
|
import soundfile as sf
|
2024-12-31 02:55:51 -07:00
|
|
|
from loguru import logger
|
2024-12-31 01:52:16 -07:00
|
|
|
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2025-01-04 17:54:54 -07:00
|
|
|
class AudioNormalizer:
|
|
|
|
"""Handles audio normalization state for a single stream"""
|
|
|
|
def __init__(self):
|
|
|
|
self.int16_max = np.iinfo(np.int16).max
|
|
|
|
|
|
|
|
def normalize(self, audio_data: np.ndarray) -> np.ndarray:
|
|
|
|
"""Normalize audio data to int16 range"""
|
|
|
|
# Convert to float64 for accurate scaling
|
|
|
|
audio_float = audio_data.astype(np.float64)
|
|
|
|
|
|
|
|
# Scale to int16 range while preserving relative amplitudes
|
|
|
|
max_val = np.abs(audio_float).max()
|
|
|
|
if max_val > 0:
|
|
|
|
scaling = self.int16_max / max_val
|
|
|
|
audio_float *= scaling
|
|
|
|
|
|
|
|
# Clip to int16 range and convert
|
|
|
|
return np.clip(audio_float, -self.int16_max, self.int16_max).astype(np.int16)
|
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
class AudioService:
|
|
|
|
"""Service for audio format conversions"""
|
2025-01-04 17:54:54 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
@staticmethod
|
2024-12-31 01:57:00 -07:00
|
|
|
def convert_audio(
|
2025-01-04 17:54:54 -07:00
|
|
|
audio_data: np.ndarray,
|
|
|
|
sample_rate: int,
|
|
|
|
output_format: str,
|
|
|
|
is_first_chunk: bool = True,
|
|
|
|
normalizer: AudioNormalizer = None
|
2024-12-31 01:57:00 -07:00
|
|
|
) -> bytes:
|
2024-12-31 01:52:16 -07:00
|
|
|
"""Convert audio data to specified format
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
Args:
|
|
|
|
audio_data: Numpy array of audio samples
|
|
|
|
sample_rate: Sample rate of the audio
|
2025-01-01 21:11:23 +05:30
|
|
|
output_format: Target format (wav, mp3, opus, flac, pcm)
|
2025-01-04 17:54:54 -07:00
|
|
|
is_first_chunk: Whether this is the first chunk of a stream
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
Returns:
|
|
|
|
Bytes of the converted audio
|
|
|
|
"""
|
|
|
|
buffer = BytesIO()
|
2024-12-31 01:57:00 -07:00
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
try:
|
2025-01-04 17:54:54 -07:00
|
|
|
# Normalize audio if normalizer provided, otherwise just convert to int16
|
|
|
|
if normalizer is not None:
|
|
|
|
normalized_audio = normalizer.normalize(audio_data)
|
|
|
|
else:
|
|
|
|
normalized_audio = audio_data.astype(np.int16)
|
|
|
|
|
|
|
|
if output_format == "pcm":
|
|
|
|
logger.info("Writing PCM data...")
|
|
|
|
# Raw 16-bit PCM samples, no header
|
|
|
|
buffer.write(normalized_audio.tobytes())
|
|
|
|
elif output_format == "wav":
|
2024-12-31 01:52:16 -07:00
|
|
|
logger.info("Writing to WAV format...")
|
2025-01-04 17:54:54 -07:00
|
|
|
# Always include WAV header for WAV format
|
|
|
|
sf.write(buffer, normalized_audio, sample_rate, format="WAV", subtype='PCM_16')
|
|
|
|
elif output_format in ["mp3", "aac"]:
|
|
|
|
logger.info(f"Converting to {output_format.upper()} format...")
|
|
|
|
# Use lower bitrate for streaming
|
|
|
|
sf.write(buffer, normalized_audio, sample_rate, format=output_format.upper(),
|
|
|
|
subtype='COMPRESSED')
|
2024-12-31 01:57:00 -07:00
|
|
|
elif output_format == "opus":
|
2024-12-31 01:52:16 -07:00
|
|
|
logger.info("Converting to Opus format...")
|
2025-01-04 17:54:54 -07:00
|
|
|
# Use lower bitrate and smaller frame size for streaming
|
|
|
|
sf.write(buffer, normalized_audio, sample_rate, format="OGG", subtype="OPUS")
|
2024-12-31 01:57:00 -07:00
|
|
|
elif output_format == "flac":
|
2024-12-31 01:52:16 -07:00
|
|
|
logger.info("Converting to FLAC format...")
|
2025-01-04 17:54:54 -07:00
|
|
|
# Use smaller block size for streaming
|
|
|
|
sf.write(buffer, normalized_audio, sample_rate, format="FLAC",
|
|
|
|
subtype='PCM_16')
|
2024-12-31 01:52:16 -07:00
|
|
|
else:
|
2024-12-31 01:57:00 -07:00
|
|
|
raise ValueError(
|
2025-01-01 21:11:23 +05:30
|
|
|
f"Format {output_format} not supported. Supported formats are: wav, mp3, opus, flac, pcm."
|
2024-12-31 01:57:00 -07:00
|
|
|
)
|
|
|
|
|
2025-01-01 21:11:23 +05:30
|
|
|
buffer.seek(0)
|
|
|
|
return buffer.getvalue()
|
|
|
|
|
2024-12-31 01:52:16 -07:00
|
|
|
except Exception as e:
|
|
|
|
logger.error(f"Error converting audio to {output_format}: {str(e)}")
|
|
|
|
raise ValueError(f"Failed to convert audio to {output_format}: {str(e)}")
|