fixes and corrections to code that didn't cause errors but didn't really make sense

This commit is contained in:
Fireblade 2025-03-02 21:36:34 -05:00
parent 9c6e72943c
commit b3d5f4de08
4 changed files with 9 additions and 10 deletions

View file

@ -23,7 +23,7 @@ In conclusion, "Jet Black Heart" by 5 Seconds of Summer is far more than a typic
5 Seconds of Summer, initially perceived as purveyors of upbeat, radio-friendly pop-punk, embarked on a significant artistic evolution with their album Sounds Good Feels Good. Among its tracks, "Jet Black Heart" stands out as a powerful testament to this shift, moving beyond catchy melodies and embracing a darker, more emotionally complex sound. Released in 2015, the song transcends the typical themes of youthful exuberance and romantic angst, instead plunging into the depths of personal turmoil and the corrosive effects of inner darkness on interpersonal relationships. "Jet Black Heart" is not merely a song about heartbreak; it is a raw and vulnerable exploration of internal struggle, self-destructive patterns, and the precarious flicker of hope that persists even in the face of profound emotional chaos."""
Type="mp3"
Type="wav"
response = requests.post(
"http://localhost:8880/dev/captioned_speech",
json={
@ -51,12 +51,12 @@ for chunk in response.iter_lines(decode_unicode=True):
f.write(chunk_audio)
# Print word level timestamps
last3=chunk_json["timestamps"][-3]
last_chunks={"start_time":chunk_json["timestamps"][-10]["start_time"],"end_time":chunk_json["timestamps"][-3]["end_time"],"word":" ".join([X["word"] for X in chunk_json["timestamps"][-10:-3]])}
print(f"CUTTING TO {last3['word']}")
print(f"CUTTING TO {last_chunks['word']}")
audioseg=pydub.AudioSegment.from_file(f"outputstream.{Type}",format=Type)
audioseg=audioseg[last3["start_time"]*1000:last3["end_time"] * 1000]
audioseg=audioseg[last_chunks["start_time"]*1000:last_chunks["end_time"] * 1000]
audioseg.export(f"outputstreamcut.{Type}",format=Type)

View file

@ -259,10 +259,6 @@ class KokoroV1(BaseModelBackend):
)
if result.pred_dur is not None:
try:
# Join timestamps for this chunk's tokens
KPipeline.join_timestamps(
result.tokens, result.pred_dur
)
# Add timestamps with offset
for token in result.tokens:

View file

@ -254,7 +254,10 @@ async def create_captioned_speech(
base64_chunk= base64.b64encode(chunk_data.output).decode("utf-8")
# Add any chunks that may be in the acumulator into the return word_timestamps
chunk_data.word_timestamps=timestamp_acumulator + chunk_data.word_timestamps
if chunk_data.word_timestamps != None:
chunk_data.word_timestamps = timestamp_acumulator + chunk_data.word_timestamps
else:
chunk_data.word_timestamps = []
timestamp_acumulator=[]
yield CaptionedSpeechResponse(audio=base64_chunk,audio_format=content_type,timestamps=chunk_data.word_timestamps)

View file

@ -121,7 +121,7 @@ class AudioService:
is_last_chunk: bool = False,
trim_audio: bool = True,
normalizer: AudioNormalizer = None,
) -> Tuple[AudioChunk]:
) -> AudioChunk:
"""Convert audio data to specified format with streaming support
Args: