From 2dc9b81ad5713a79200e9110bf0fb960f63f60dc Mon Sep 17 00:00:00 2001 From: CodePothunter Date: Fri, 7 Mar 2025 20:17:35 +0800 Subject: [PATCH] Fix audio chunk concatenation and dtype conversion confliction - Modify audio chunk concatenation to handle float32 audio data - Add explicit conversion from float32 to int16 using amplitude scaling - Remove unnecessary dtype specification in np.concatenate --- api/src/inference/base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/src/inference/base.py b/api/src/inference/base.py index 6b59fa9..506e700 100644 --- a/api/src/inference/base.py +++ b/api/src/inference/base.py @@ -23,9 +23,14 @@ class AudioChunk: output=AudioChunk(audio_chunk_list[0].audio,audio_chunk_list[0].word_timestamps) for audio_chunk in audio_chunk_list[1:]: - output.audio=np.concatenate((output.audio,audio_chunk.audio),dtype=np.int16) + output.audio=np.concatenate((output.audio, audio_chunk.audio)) if output.word_timestamps is not None: output.word_timestamps+=audio_chunk.word_timestamps + + # 从float32转换为int16,使用音频的方式 + if output.audio.dtype == np.float32: + output.audio = (output.audio * 32767).astype(np.int16) + return output