diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py
index 84f4f79..3361ade 100644
--- a/api/src/inference/kokoro_v1.py
+++ b/api/src/inference/kokoro_v1.py
@@ -295,7 +295,8 @@ class KokoroV1(BaseModelBackend):
                                 logger.error(
                                     f"Failed to process timestamps for chunk: {e}"
                                 )
-                        
+                      
+                      
                     yield AudioChunk(result.audio.numpy(),word_timestamps=word_timestamps)
                 else:
                     logger.warning("No audio in chunk")
diff --git a/api/src/routers/openai_compatible.py b/api/src/routers/openai_compatible.py
index e285234..0eaa266 100644
--- a/api/src/routers/openai_compatible.py
+++ b/api/src/routers/openai_compatible.py
@@ -128,7 +128,7 @@ async def process_voices(
 
 async def stream_audio_chunks(
     tts_service: TTSService, request: OpenAISpeechRequest, client_request: Request
-) -> AsyncGenerator[Tuple[bytes,AudioChunk], None]:
+) -> AsyncGenerator[list, None]:
     """Stream audio chunks as they're generated with client disconnect handling"""
     voice_name = await process_voices(request.voice, tts_service)
 
@@ -140,8 +140,10 @@ async def stream_audio_chunks(
             speed=request.speed,
             output_format=request.response_format,
             lang_code=request.lang_code or request.voice[0],
-            normalization_options=request.normalization_options
+            normalization_options=request.normalization_options,
+            return_timestamps=True,
         ):
+
             # Check if client is still connected
             is_disconnected = client_request.is_disconnected
             if callable(is_disconnected):
@@ -149,7 +151,8 @@ async def stream_audio_chunks(
             if is_disconnected:
                 logger.info("Client disconnected, stopping audio generation")
                 break
-            yield chunk, chunk_data
+
+            yield chunk
     except Exception as e:
         logger.error(f"Error in audio streaming: {str(e)}")
         # Let the exception propagate to trigger cleanup
@@ -158,6 +161,7 @@ async def stream_audio_chunks(
 
 @router.post("/audio/speech")
 async def create_speech(
+    
     request: OpenAISpeechRequest,
     client_request: Request,
     x_raw_response: str = Header(None, alias="x-raw-response"),
@@ -217,7 +221,7 @@ async def create_speech(
                 async def dual_output():
                     try:
                         # Write chunks to temp file and stream
-                        async for chunk, chunk_data in generator:
+                        async for chunk in generator:
                             if chunk:  # Skip empty chunks
                                 await temp_writer.write(chunk)
                                 #if return_json:
diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py
index 470523e..172a133 100644
--- a/api/src/services/tts_service.py
+++ b/api/src/services/tts_service.py
@@ -247,7 +247,7 @@ class TTSService:
         """Generate and stream audio chunks."""
         stream_normalizer = AudioNormalizer()
         chunk_index = 0
-
+        current_offset=0.0
         try:
             # Get backend
             backend = self.model_manager.get_backend()
@@ -261,7 +261,8 @@ class TTSService:
             logger.info(
                 f"Using lang_code '{pipeline_lang_code}' for voice '{voice_name}' in audio stream"
             )
-
+            
+            
             # Process text in chunks with smart splitting
             async for chunk_text, tokens in smart_split(text,normalization_options=normalization_options):
                 try:
@@ -277,8 +278,17 @@ class TTSService:
                         is_last=False,  # We'll update the last chunk later
                         normalizer=stream_normalizer,
                         lang_code=pipeline_lang_code,  # Pass lang_code
+                        return_timestamps=return_timestamps,
                     ):
+                        if chunk_data.word_timestamps is not None:
+                            for timestamp in chunk_data.word_timestamps:
+                                timestamp["start_time"]+=current_offset
+                                timestamp["end_time"]+=current_offset
+                        
+                        current_offset+=len(chunk_data.audio) / 24000
+                        
                         if result is not None:
+                            print(chunk_data.word_timestamps)
                             yield result,chunk_data
                             chunk_index += 1
                         else:
diff --git a/output.mp3 b/output.mp3
index 580f1b9..1ecd8a3 100644
Binary files a/output.mp3 and b/output.mp3 differ
diff --git a/peaks/output.mp3.reapeaks b/peaks/output.mp3.reapeaks
new file mode 100644
index 0000000..46e2ba3
Binary files /dev/null and b/peaks/output.mp3.reapeaks differ