Fix bugs of generating empty file when using streaming mode.

The reason is that when stream=True, the audio conversion functions are not really called.
2025-08-05 16:48:53 +00:00 · 2025-03-09 14:12:18 +08:00 · 2025-03-09 14:12:18 +08:00 · f998cf8d01
commit f998cf8d01
parent 3e6ee65482
2 changed files with 22 additions and 6 deletions
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@ -166,7 +166,6 @@ async def stream_audio_chunks(
            normalization_options=request.normalization_options,
            return_timestamps=unique_properties["return_timestamps"],
        ):
            # Check if client is still connected
            is_disconnected = client_request.is_disconnected
            if callable(is_disconnected):
@ -174,7 +173,7 @@ async def stream_audio_chunks(
            if is_disconnected:
                logger.info("Client disconnected, stopping audio generation")
                break
-
+            
            yield chunk_data
    except Exception as e:
        logger.error(f"Error in audio streaming: {str(e)}")
@ -273,9 +272,21 @@ async def create_speech(
            async def single_output():
                try:
                    # Stream chunks
                    is_first_chunk=True
                    async for chunk_data in generator:
-                        if chunk_data.output:  # Skip empty chunks
+                        if chunk_data.audio is not None and len(chunk_data.audio) > 0:  # Skip empty chunks
-                            yield chunk_data.output
+                            # Convert to requested format with proper encoding
                            encoded_chunk = await AudioService.convert_audio(
                                chunk_data,
                                24000,
                                request.response_format,
                                is_first_chunk=is_first_chunk,
                                is_last_chunk=False,
                                trim_audio=False
                            )
                            if encoded_chunk.output:
                                yield encoded_chunk.output
                            is_first_chunk=False
                except Exception as e:
                    logger.error(f"Error in single output streaming: {e}")
                    raise
--- a/api/src/services/streaming_audio_writer.py
+++ b/api/src/services/streaming_audio_writer.py
@ -64,9 +64,14 @@ class StreamingAudioWriter:
            frame.pts = self.pts
            self.pts += frame.samples
            encoded_data = b""
            for packet in self.stream.encode(frame):
                self.container.mux(packet)
                # Get the encoded data from the buffer
                encoded_data = self.output_buffer.getvalue()
                # Clear the buffer for next write
                self.output_buffer.seek(0)
                self.output_buffer.truncate(0)
-            # Only return a null byte and keep the container running.
+            return encoded_data
            return b""