mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
more work on streaming timestamps (not working weird error) :(
This commit is contained in:
parent
91d370d97f
commit
6985f6ef99
5 changed files with 21 additions and 23 deletions
5
Test.py
5
Test.py
|
@ -12,10 +12,7 @@ response = requests.post(
|
|||
"input": "http://localhost:8880/web/",
|
||||
"voice": "af_heart",
|
||||
"response_format": "mp3", # Supported: mp3, wav, opus, flac
|
||||
"speed": 1.0,
|
||||
"normalization_options": {
|
||||
"normalize": True
|
||||
}
|
||||
"speed": 1.0
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
@ -291,17 +291,6 @@ class KokoroV1(BaseModelBackend):
|
|||
f"Added timestamp for word '{token.text}': {start_time:.3f}s - {end_time:.3f}s"
|
||||
)
|
||||
|
||||
# Update offset for next chunk based on pred_dur
|
||||
chunk_duration = (
|
||||
float(result.pred_dur.sum()) / 80
|
||||
) # Convert frames to seconds
|
||||
current_offset = max(
|
||||
current_offset + chunk_duration, end_time
|
||||
)
|
||||
logger.debug(
|
||||
f"Updated time offset to {current_offset:.3f}s"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to process timestamps for chunk: {e}"
|
||||
|
|
|
@ -8,7 +8,7 @@ import tempfile
|
|||
from typing import AsyncGenerator, Dict, List, Union, Tuple
|
||||
|
||||
import aiofiles
|
||||
from inference.base import AudioChunk
|
||||
from ..inference.base import AudioChunk
|
||||
import torch
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, Request, Response
|
||||
from fastapi.responses import FileResponse, StreamingResponse
|
||||
|
@ -214,15 +214,15 @@ async def create_speech(
|
|||
}
|
||||
|
||||
# Create async generator for streaming
|
||||
async def dual_output(return_json:bool=False):
|
||||
async def dual_output():
|
||||
try:
|
||||
# Write chunks to temp file and stream
|
||||
async for chunk, chunk_data in generator:
|
||||
if chunk: # Skip empty chunks
|
||||
await temp_writer.write(chunk)
|
||||
if return_json:
|
||||
yield chunk, chunk_data
|
||||
else:
|
||||
#if return_json:
|
||||
# yield chunk, chunk_data
|
||||
#else:
|
||||
yield chunk
|
||||
|
||||
# Finalize the temp file
|
||||
|
|
|
@ -328,10 +328,21 @@ class TTSService:
|
|||
) -> Tuple[Tuple[np.ndarray,AudioChunk]]:
|
||||
"""Generate complete audio for text using streaming internally."""
|
||||
start_time = time.time()
|
||||
chunks = []
|
||||
audio_chunks = []
|
||||
audio_data_chunks=[]
|
||||
word_timestamps = []
|
||||
|
||||
start_time = time.time()
|
||||
chunks = []
|
||||
word_timestamps = []
|
||||
try:
|
||||
async for audio_stream,audio_stream_data in self.generate_audio_stream(text,voice,speed=speed,return_timestamps=return_timestamps,lang_code=lang_code):
|
||||
print("common")
|
||||
audio_chunks.append(audio_stream_data.audio)
|
||||
audio_data_chunks.append(audio_stream_data)
|
||||
|
||||
print(audio_data_chunks)
|
||||
"""
|
||||
# Get backend and voice path
|
||||
backend = self.model_manager.get_backend()
|
||||
voice_name, voice_path = await self._get_voice_path(voice)
|
||||
|
@ -574,11 +585,12 @@ class TTSService:
|
|||
[],
|
||||
) # Empty timestamps for legacy backends
|
||||
return audio, processing_time
|
||||
|
||||
"""
|
||||
except Exception as e:
|
||||
logger.error(f"Error in audio generation: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
async def combine_voices(self, voices: List[str]) -> torch.Tensor:
|
||||
"""Combine multiple voices.
|
||||
|
||||
|
|
BIN
output.mp3
BIN
output.mp3
Binary file not shown.
Loading…
Add table
Reference in a new issue