mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-08-05 16:48:53 +00:00
Update audio.py
This commit is contained in:
parent
7938de0f4a
commit
94b6fc22ea
1 changed files with 16 additions and 31 deletions
|
@ -4,7 +4,6 @@ from io import BytesIO
|
|||
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import scipy.io.wavfile as wavfile
|
||||
from loguru import logger
|
||||
|
||||
|
||||
|
@ -20,7 +19,7 @@ class AudioService:
|
|||
Args:
|
||||
audio_data: Numpy array of audio samples
|
||||
sample_rate: Sample rate of the audio
|
||||
output_format: Target format (wav, mp3, etc.)
|
||||
output_format: Target format (wav, mp3, opus, flac, pcm)
|
||||
|
||||
Returns:
|
||||
Bytes of the converted audio
|
||||
|
@ -30,46 +29,32 @@ class AudioService:
|
|||
try:
|
||||
if output_format == "wav":
|
||||
logger.info("Writing to WAV format...")
|
||||
wavfile.write(buffer, sample_rate, audio_data)
|
||||
return buffer.getvalue()
|
||||
|
||||
# Ensure audio_data is in int16 format for WAV
|
||||
audio_data_wav = audio_data.astype(np.int16)
|
||||
sf.write(buffer, audio_data_wav, sample_rate, format="WAV")
|
||||
elif output_format == "mp3":
|
||||
# For MP3, we need to convert to WAV first
|
||||
logger.info("Converting to MP3 format...")
|
||||
wav_buffer = BytesIO()
|
||||
wavfile.write(wav_buffer, sample_rate, audio_data)
|
||||
wav_buffer.seek(0)
|
||||
|
||||
# Convert WAV to MP3 using soundfile
|
||||
buffer = BytesIO()
|
||||
sf.write(buffer, audio_data, sample_rate, format="mp3")
|
||||
return buffer.getvalue()
|
||||
|
||||
# soundfile can write MP3 if ffmpeg or libsox is installed
|
||||
sf.write(buffer, audio_data, sample_rate, format="MP3")
|
||||
elif output_format == "opus":
|
||||
logger.info("Converting to Opus format...")
|
||||
sf.write(buffer, audio_data, sample_rate, format="ogg", subtype="opus")
|
||||
return buffer.getvalue()
|
||||
|
||||
sf.write(buffer, audio_data, sample_rate, format="OGG", subtype="OPUS")
|
||||
elif output_format == "flac":
|
||||
logger.info("Converting to FLAC format...")
|
||||
sf.write(buffer, audio_data, sample_rate, format="flac")
|
||||
return buffer.getvalue()
|
||||
|
||||
elif output_format == "aac":
|
||||
raise ValueError(
|
||||
"AAC format is not currently supported. Please use wav, mp3, opus, or flac."
|
||||
)
|
||||
|
||||
sf.write(buffer, audio_data, sample_rate, format="FLAC")
|
||||
elif output_format == "pcm":
|
||||
raise ValueError(
|
||||
"PCM format is not currently supported. Please use wav, mp3, opus, or flac."
|
||||
)
|
||||
|
||||
logger.info("Extracting PCM data...")
|
||||
# Ensure audio_data is in int16 format for PCM
|
||||
audio_data_pcm = audio_data.astype(np.int16)
|
||||
buffer.write(audio_data_pcm.tobytes())
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Format {output_format} not supported. Supported formats are: wav, mp3, opus, flac."
|
||||
f"Format {output_format} not supported. Supported formats are: wav, mp3, opus, flac, pcm."
|
||||
)
|
||||
|
||||
buffer.seek(0)
|
||||
return buffer.getvalue()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting audio to {output_format}: {str(e)}")
|
||||
raise ValueError(f"Failed to convert audio to {output_format}: {str(e)}")
|
||||
|
|
Loading…
Add table
Reference in a new issue