From 655c243f1d642b60caa5779cca0fb13770b6e4dd Mon Sep 17 00:00:00 2001 From: fondoger Date: Sat, 12 Apr 2025 22:08:05 +0800 Subject: [PATCH] Fix bug: same reference issue --- api/src/inference/base.py | 6 +++--- api/src/services/tts_service.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/src/inference/base.py b/api/src/inference/base.py index e25c2b5..fe70639 100644 --- a/api/src/inference/base.py +++ b/api/src/inference/base.py @@ -12,12 +12,12 @@ class AudioChunk: def __init__( self, - audio: np.ndarray, - word_timestamps: Optional[List] = [], + audio: np.ndarray, # dtype: np.int16 + word_timestamps: Optional[List] = None, # Using None instead of `[]` to avoid reference to the same empty list. output: Optional[Union[bytes, np.ndarray]] = b"", ): self.audio = audio - self.word_timestamps = word_timestamps + self.word_timestamps = word_timestamps if word_timestamps is not None else [] self.output = output @staticmethod diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py index 962d42d..7a8aaa3 100644 --- a/api/src/services/tts_service.py +++ b/api/src/services/tts_service.py @@ -65,7 +65,7 @@ class TTSService: # Handle silence tags, eg: `[silent](0.5s)` if match := SILENCE_TAG.match(chunk_text): silence_duration = float(match.group(1)) - silence_audio = np.zeros(int(silence_duration * 24000), dtype=np.float32) + silence_audio = np.zeros(int(silence_duration * 24000), dtype=np.int16) if not output_format: yield AudioChunk(silence_audio, output=b"") return @@ -89,7 +89,7 @@ class TTSService: return chunk_data = await AudioService.convert_audio( AudioChunk( - np.array([], dtype=np.float32) + np.array([], dtype=np.int16) ), # Dummy data for type checking output_format, writer,