Merge pull request #310 from fireblade2534/master
Some checks failed
CI / test (3.10) (push) Has been cancelled

Added some better saftey checks to captioned speech
This commit is contained in:
Fireblade2534 2025-05-09 09:13:02 -04:00 committed by GitHub
commit 6f1450c5b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 31 additions and 5 deletions

View file

@ -224,10 +224,13 @@ async def create_captioned_speech(
).decode("utf-8")
# Add any chunks that may be in the acumulator into the return word_timestamps
chunk_data.word_timestamps = (
timestamp_acumulator + chunk_data.word_timestamps
)
timestamp_acumulator = []
if chunk_data.word_timestamps is not None:
chunk_data.word_timestamps = (
timestamp_acumulator + chunk_data.word_timestamps
)
timestamp_acumulator = []
else:
chunk_data.word_timestamps = []
yield CaptionedSpeechResponse(
audio=base64_chunk,
@ -272,7 +275,7 @@ async def create_captioned_speech(
)
# Add any chunks that may be in the acumulator into the return word_timestamps
if chunk_data.word_timestamps != None:
if chunk_data.word_timestamps is not None:
chunk_data.word_timestamps = (
timestamp_acumulator + chunk_data.word_timestamps
)

23
dev/Test Phon.py Normal file
View file

@ -0,0 +1,23 @@
import base64
import json
import pydub
import requests
def generate_audio_from_phonemes(phonemes: str, voice: str = "af_bella"):
"""Generate audio from phonemes"""
response = requests.post(
"http://localhost:8880/dev/generate_from_phonemes",
json={"phonemes": phonemes, "voice": voice},
headers={"Accept": "audio/wav"}
)
if response.status_code != 200:
print(f"Error: {response.text}")
return None
return response.content
with open(f"outputnostreammoney.wav", "wb") as f:
f.write(generate_audio_from_phonemes(r"mɪsəki ɪz ɐn ɪkspˌɛɹəmˈɛntᵊl ʤˈitəpˈi ˈɛnʤən dəzˈInd tə pˈWəɹ fjˈuʧəɹ vˈɜɹʒənz ʌv kəkˈɔɹO mˈɑdᵊlz."))