Fixed not returning enough values

This commit is contained in:
Fireblade2534 2025-02-12 15:06:11 +00:00
parent 5cc9d140fe
commit 51b6b01589
3 changed files with 16 additions and 18 deletions

View file

@ -11,7 +11,7 @@ class AudioChunk:
def __init__(self,
audio: np.ndarray,
word_timestamps: Optional[List]=None
word_timestamps: Optional[List]=[]
):
self.audio=audio
self.word_timestamps=word_timestamps

View file

@ -57,7 +57,6 @@ class AudioNormalizer:
non_silent_index_start, non_silent_index_end = None,None
for X in range(0,len(audio_data)):
#print(audio_data[X])
if audio_data[X] > amplitude_threshold:
non_silent_index_start=X
break
@ -149,11 +148,9 @@ class AudioService:
if normalizer is None:
normalizer = AudioNormalizer()
print("1")
audio_chunk.audio = await normalizer.normalize(audio_chunk.audio)
print("2")
audio_chunk = AudioService.trim_audio(audio_chunk,chunk_text,speed,is_last_chunk,normalizer)
print("3")
# Get or create format-specific writer
writer_key = f"{output_format}_{sample_rate}"
if is_first_chunk or writer_key not in AudioService._writers:
@ -161,18 +158,16 @@ class AudioService:
output_format, sample_rate
)
writer = AudioService._writers[writer_key]
print("4")
# Write audio data first
if len(audio_chunk.audio) > 0:
chunk_data = writer.write_chunk(audio_chunk.audio)
print("5")
# Then finalize if this is the last chunk
if is_last_chunk:
print("6")
final_data = writer.write_chunk(finalize=True)
print("7")
del AudioService._writers[writer_key]
return final_data if final_data else b""
return final_data if final_data else b"", audio_chunk
return chunk_data if chunk_data else b"", audio_chunk
@ -206,8 +201,10 @@ class AudioService:
start_index,end_index=normalizer.find_first_last_non_silent(audio_chunk.audio,chunk_text,speed,is_last_chunk=is_last_chunk)
audio_chunk.audio=audio_chunk.audio[start_index:end_index]
for timestamp in audio_chunk.word_timestamps:
timestamp["start_time"]-=start_index * 24000
timestamp["end_time"]-=start_index * 24000
if audio_chunk.word_timestamps is not None:
for timestamp in audio_chunk.word_timestamps:
timestamp["start_time"]-=start_index / 24000
timestamp["end_time"]-=start_index / 24000
return audio_chunk

View file

@ -6,6 +6,7 @@ import tempfile
import time
from typing import AsyncGenerator, List, Optional, Tuple, Union
from ..inference.base import AudioChunk
import numpy as np
import torch
from kokoro import KPipeline
@ -62,9 +63,8 @@ class TTSService:
if not output_format:
yield np.array([], dtype=np.float32)
return
result = await AudioService.convert_audio(
np.array([0], dtype=np.float32), # Dummy data for type checking
result, _ = await AudioService.convert_audio(
AudioChunk(np.array([0], dtype=np.float32)), # Dummy data for type checking
24000,
output_format,
speed,
@ -119,7 +119,7 @@ class TTSService:
print(chunk_data.word_timestamps)
yield chunk_data.audio
else:
print("old backend")
# For legacy backends, load voice tensor
voice_tensor = await self._voice_manager.load_voice(
voice_name, device=backend.device
@ -315,7 +315,8 @@ class TTSService:
except Exception as e:
logger.error(f"Error in phoneme audio generation: {str(e)}")
raise
raise e
async def generate_audio(
self,