more work on streaming timestamps (not working weird error) :(

This commit is contained in:
Fireblade2534 2025-02-12 20:34:55 +00:00
parent 91d370d97f
commit 6985f6ef99
5 changed files with 21 additions and 23 deletions

View file

@ -12,10 +12,7 @@ response = requests.post(
"input": "http://localhost:8880/web/", "input": "http://localhost:8880/web/",
"voice": "af_heart", "voice": "af_heart",
"response_format": "mp3", # Supported: mp3, wav, opus, flac "response_format": "mp3", # Supported: mp3, wav, opus, flac
"speed": 1.0, "speed": 1.0
"normalization_options": {
"normalize": True
}
} }
) )

View file

@ -291,17 +291,6 @@ class KokoroV1(BaseModelBackend):
f"Added timestamp for word '{token.text}': {start_time:.3f}s - {end_time:.3f}s" f"Added timestamp for word '{token.text}': {start_time:.3f}s - {end_time:.3f}s"
) )
# Update offset for next chunk based on pred_dur
chunk_duration = (
float(result.pred_dur.sum()) / 80
) # Convert frames to seconds
current_offset = max(
current_offset + chunk_duration, end_time
)
logger.debug(
f"Updated time offset to {current_offset:.3f}s"
)
except Exception as e: except Exception as e:
logger.error( logger.error(
f"Failed to process timestamps for chunk: {e}" f"Failed to process timestamps for chunk: {e}"

View file

@ -8,7 +8,7 @@ import tempfile
from typing import AsyncGenerator, Dict, List, Union, Tuple from typing import AsyncGenerator, Dict, List, Union, Tuple
import aiofiles import aiofiles
from inference.base import AudioChunk from ..inference.base import AudioChunk
import torch import torch
from fastapi import APIRouter, Depends, Header, HTTPException, Request, Response from fastapi import APIRouter, Depends, Header, HTTPException, Request, Response
from fastapi.responses import FileResponse, StreamingResponse from fastapi.responses import FileResponse, StreamingResponse
@ -214,16 +214,16 @@ async def create_speech(
} }
# Create async generator for streaming # Create async generator for streaming
async def dual_output(return_json:bool=False): async def dual_output():
try: try:
# Write chunks to temp file and stream # Write chunks to temp file and stream
async for chunk, chunk_data in generator: async for chunk, chunk_data in generator:
if chunk: # Skip empty chunks if chunk: # Skip empty chunks
await temp_writer.write(chunk) await temp_writer.write(chunk)
if return_json: #if return_json:
yield chunk, chunk_data # yield chunk, chunk_data
else: #else:
yield chunk yield chunk
# Finalize the temp file # Finalize the temp file
await temp_writer.finalize() await temp_writer.finalize()

View file

@ -327,11 +327,22 @@ class TTSService:
lang_code: Optional[str] = None, lang_code: Optional[str] = None,
) -> Tuple[Tuple[np.ndarray,AudioChunk]]: ) -> Tuple[Tuple[np.ndarray,AudioChunk]]:
"""Generate complete audio for text using streaming internally.""" """Generate complete audio for text using streaming internally."""
start_time = time.time()
audio_chunks = []
audio_data_chunks=[]
word_timestamps = []
start_time = time.time() start_time = time.time()
chunks = [] chunks = []
word_timestamps = [] word_timestamps = []
try: try:
async for audio_stream,audio_stream_data in self.generate_audio_stream(text,voice,speed=speed,return_timestamps=return_timestamps,lang_code=lang_code):
print("common")
audio_chunks.append(audio_stream_data.audio)
audio_data_chunks.append(audio_stream_data)
print(audio_data_chunks)
"""
# Get backend and voice path # Get backend and voice path
backend = self.model_manager.get_backend() backend = self.model_manager.get_backend()
voice_name, voice_path = await self._get_voice_path(voice) voice_name, voice_path = await self._get_voice_path(voice)
@ -574,10 +585,11 @@ class TTSService:
[], [],
) # Empty timestamps for legacy backends ) # Empty timestamps for legacy backends
return audio, processing_time return audio, processing_time
"""
except Exception as e: except Exception as e:
logger.error(f"Error in audio generation: {str(e)}") logger.error(f"Error in audio generation: {str(e)}")
raise raise
async def combine_voices(self, voices: List[str]) -> torch.Tensor: async def combine_voices(self, voices: List[str]) -> torch.Tensor:
"""Combine multiple voices. """Combine multiple voices.

Binary file not shown.