diff --git a/api/src/routers/development.py b/api/src/routers/development.py
index 6e09b73..7b6246f 100644
--- a/api/src/routers/development.py
+++ b/api/src/routers/development.py
@@ -210,12 +210,22 @@ async def create_captioned_speech(
                     try:
                         # Write chunks to temp file and stream
                         async for chunk_data in generator:
+                            # The timestamp acumulator is only used when word level time stamps are generated but no audio is returned.
+                            timestamp_acumulator=[]
+                            
                             if chunk_data.output:  # Skip empty chunks
                                 await temp_writer.write(chunk_data.output)
                                 base64_chunk= base64.b64encode(chunk_data.output).decode("utf-8")
+                                
+                                # Add any chunks that may be in the acumulator into the return word_timestamps
+                                chunk_data.word_timestamps=timestamp_acumulator + chunk_data.word_timestamps
+                                timestamp_acumulator=[]
                             
                                 yield CaptionedSpeechResponse(audio=base64_chunk,audio_format=content_type,timestamps=chunk_data.word_timestamps)
-
+                            else:
+                                if chunk_data.word_timestamps is not None and len(chunk_data.word_timestamps) > 0:
+                                    timestamp_acumulator+=chunk_data.word_timestamps
+                                
                         # Finalize the temp file
                         await temp_writer.finalize()
                     except Exception as e:
@@ -234,13 +244,24 @@ async def create_captioned_speech(
 
             async def single_output():
                 try:
+                    # The timestamp acumulator is only used when word level time stamps are generated but no audio is returned.
+                    timestamp_acumulator=[]
+                    
                     # Stream chunks
                     async for chunk_data in generator:
                         if chunk_data.output:  # Skip empty chunks
                             # Encode the chunk bytes into base 64
                             base64_chunk= base64.b64encode(chunk_data.output).decode("utf-8")
                             
+                            # Add any chunks that may be in the acumulator into the return word_timestamps
+                            chunk_data.word_timestamps=timestamp_acumulator + chunk_data.word_timestamps
+                            timestamp_acumulator=[]
+                            
                             yield CaptionedSpeechResponse(audio=base64_chunk,audio_format=content_type,timestamps=chunk_data.word_timestamps)
+                        else:
+                            if chunk_data.word_timestamps is not None and len(chunk_data.word_timestamps) > 0:
+                                timestamp_acumulator+=chunk_data.word_timestamps
+                                
                 except Exception as e:
                     logger.error(f"Error in single output streaming: {e}")
                     raise