Fix bugs of generating empty file when using streaming mode.

The reason is that when stream=True, the audio conversion functions are not really called.
2025-08-05 16:48:53 +00:00 · 2025-03-09 14:12:18 +08:00 · 2025-03-09 14:12:18 +08:00 · f998cf8d01
commit f998cf8d01
parent 3e6ee65482
2 changed files with 22 additions and 6 deletions
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@ -166,7 +166,6 @@ async def stream_audio_chunks(
            normalization_options=request.normalization_options,
            return_timestamps=unique_properties["return_timestamps"],
        ):
-
            # Check if client is still connected
            is_disconnected = client_request.is_disconnected
            if callable(is_disconnected):
@ -273,9 +272,21 @@ async def create_speech(
            async def single_output():
                try:
                    # Stream chunks
+                    is_first_chunk=True
                    async for chunk_data in generator:
-                        if chunk_data.output:  # Skip empty chunks
-                            yield chunk_data.output
+                        if chunk_data.audio is not None and len(chunk_data.audio) > 0:  # Skip empty chunks
+                            # Convert to requested format with proper encoding
+                            encoded_chunk = await AudioService.convert_audio(
+                                chunk_data,
+                                24000,
+                                request.response_format,
+                                is_first_chunk=is_first_chunk,
+                                is_last_chunk=False,
+                                trim_audio=False
+                            )
+                            if encoded_chunk.output:
+                                yield encoded_chunk.output
+                            is_first_chunk=False
                except Exception as e:
                    logger.error(f"Error in single output streaming: {e}")
                    raise
--- a/api/src/services/streaming_audio_writer.py
+++ b/api/src/services/streaming_audio_writer.py
@ -64,9 +64,14 @@ class StreamingAudioWriter:
            frame.pts = self.pts
            self.pts += frame.samples
            
+            encoded_data = b""
            for packet in self.stream.encode(frame):
                self.container.mux(packet)
+                # Get the encoded data from the buffer
+                encoded_data = self.output_buffer.getvalue()
+                # Clear the buffer for next write
+                self.output_buffer.seek(0)
+                self.output_buffer.truncate(0)
            
-            # Only return a null byte and keep the container running.
-            return b""
+            return encoded_data