Kokoro-FastAPI/api/src/services/audio.py

76 lines
2.5 KiB
Python
Raw Normal View History

"""Audio conversion service"""
from io import BytesIO
import numpy as np
import soundfile as sf
import scipy.io.wavfile as wavfile
from loguru import logger
class AudioService:
"""Service for audio format conversions"""
@staticmethod
def convert_audio(
audio_data: np.ndarray, sample_rate: int, output_format: str
) -> bytes:
"""Convert audio data to specified format
Args:
audio_data: Numpy array of audio samples
sample_rate: Sample rate of the audio
output_format: Target format (wav, mp3, etc.)
Returns:
Bytes of the converted audio
"""
buffer = BytesIO()
try:
if output_format == "wav":
logger.info("Writing to WAV format...")
wavfile.write(buffer, sample_rate, audio_data)
return buffer.getvalue()
elif output_format == "mp3":
# For MP3, we need to convert to WAV first
logger.info("Converting to MP3 format...")
wav_buffer = BytesIO()
wavfile.write(wav_buffer, sample_rate, audio_data)
wav_buffer.seek(0)
# Convert WAV to MP3 using soundfile
buffer = BytesIO()
sf.write(buffer, audio_data, sample_rate, format="mp3")
return buffer.getvalue()
elif output_format == "opus":
logger.info("Converting to Opus format...")
sf.write(buffer, audio_data, sample_rate, format="ogg", subtype="opus")
return buffer.getvalue()
elif output_format == "flac":
logger.info("Converting to FLAC format...")
sf.write(buffer, audio_data, sample_rate, format="flac")
return buffer.getvalue()
elif output_format == "aac":
raise ValueError(
"AAC format is not currently supported. Please use wav, mp3, opus, or flac."
)
elif output_format == "pcm":
raise ValueError(
"PCM format is not currently supported. Please use wav, mp3, opus, or flac."
)
else:
raise ValueError(
f"Format {output_format} not supported. Supported formats are: wav, mp3, opus, flac."
)
except Exception as e:
logger.error(f"Error converting audio to {output_format}: {str(e)}")
raise ValueError(f"Failed to convert audio to {output_format}: {str(e)}")