mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
Fix truncated playback issue in streaming WAV responses.
This commit is contained in:
parent
55ce88bfb6
commit
b8d592081e
1 changed files with 25 additions and 8 deletions
|
@ -1,6 +1,7 @@
|
||||||
"""Audio conversion service"""
|
"""Audio conversion service"""
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
import struct
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.io.wavfile as wavfile
|
import scipy.io.wavfile as wavfile
|
||||||
|
@ -107,14 +108,30 @@ class AudioService:
|
||||||
# Raw 16-bit PCM samples, no header
|
# Raw 16-bit PCM samples, no header
|
||||||
buffer.write(normalized_audio.tobytes())
|
buffer.write(normalized_audio.tobytes())
|
||||||
elif output_format == "wav":
|
elif output_format == "wav":
|
||||||
# WAV format with headers
|
# Write the WAV header ourselves so that we can specify a "fake" data size.
|
||||||
sf.write(
|
# This is necessary for streaming responses to work properly: if we simply
|
||||||
buffer,
|
# concatenated individual WAV files then the initial chunk's header length
|
||||||
normalized_audio,
|
# would be shorter than the full file length and subsequent chunks' RIFF
|
||||||
sample_rate,
|
# headers would appear in the middle of the audio data.
|
||||||
format="WAV",
|
if is_first_chunk:
|
||||||
subtype="PCM_16",
|
# Modified from Python stdlib's wave.py module:
|
||||||
)
|
buffer.write(b'RIFF')
|
||||||
|
buffer.write(struct.pack('<L4s4sLHHLLHH4s',
|
||||||
|
0xFFFFFFFF, # total size (set to max)
|
||||||
|
b'WAVE',
|
||||||
|
b'fmt ',
|
||||||
|
16,
|
||||||
|
1, # PCM format
|
||||||
|
1, # channels
|
||||||
|
sample_rate,
|
||||||
|
sample_rate * 2, # byte rate
|
||||||
|
2, # block align
|
||||||
|
16, # bits per sample
|
||||||
|
b'data'
|
||||||
|
))
|
||||||
|
buffer.write(struct.pack('<L', 0xFFFFFFFF)) # data size (set to max)
|
||||||
|
# write raw PCM data
|
||||||
|
buffer.write(normalized_audio.tobytes())
|
||||||
elif output_format == "mp3":
|
elif output_format == "mp3":
|
||||||
# MP3 format with proper framing
|
# MP3 format with proper framing
|
||||||
settings = format_settings.get("mp3", {}) if format_settings else {}
|
settings = format_settings.get("mp3", {}) if format_settings else {}
|
||||||
|
|
Loading…
Add table
Reference in a new issue