mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-08-05 16:48:53 +00:00
Merge pull request #310 from fireblade2534/master
Some checks failed
CI / test (3.10) (push) Has been cancelled
Some checks failed
CI / test (3.10) (push) Has been cancelled
Added some better saftey checks to captioned speech
This commit is contained in:
commit
6f1450c5b4
2 changed files with 31 additions and 5 deletions
|
@ -224,10 +224,13 @@ async def create_captioned_speech(
|
||||||
).decode("utf-8")
|
).decode("utf-8")
|
||||||
|
|
||||||
# Add any chunks that may be in the acumulator into the return word_timestamps
|
# Add any chunks that may be in the acumulator into the return word_timestamps
|
||||||
chunk_data.word_timestamps = (
|
if chunk_data.word_timestamps is not None:
|
||||||
timestamp_acumulator + chunk_data.word_timestamps
|
chunk_data.word_timestamps = (
|
||||||
)
|
timestamp_acumulator + chunk_data.word_timestamps
|
||||||
timestamp_acumulator = []
|
)
|
||||||
|
timestamp_acumulator = []
|
||||||
|
else:
|
||||||
|
chunk_data.word_timestamps = []
|
||||||
|
|
||||||
yield CaptionedSpeechResponse(
|
yield CaptionedSpeechResponse(
|
||||||
audio=base64_chunk,
|
audio=base64_chunk,
|
||||||
|
@ -272,7 +275,7 @@ async def create_captioned_speech(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add any chunks that may be in the acumulator into the return word_timestamps
|
# Add any chunks that may be in the acumulator into the return word_timestamps
|
||||||
if chunk_data.word_timestamps != None:
|
if chunk_data.word_timestamps is not None:
|
||||||
chunk_data.word_timestamps = (
|
chunk_data.word_timestamps = (
|
||||||
timestamp_acumulator + chunk_data.word_timestamps
|
timestamp_acumulator + chunk_data.word_timestamps
|
||||||
)
|
)
|
||||||
|
|
23
dev/Test Phon.py
Normal file
23
dev/Test Phon.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pydub
|
||||||
|
import requests
|
||||||
|
|
||||||
|
def generate_audio_from_phonemes(phonemes: str, voice: str = "af_bella"):
|
||||||
|
"""Generate audio from phonemes"""
|
||||||
|
response = requests.post(
|
||||||
|
"http://localhost:8880/dev/generate_from_phonemes",
|
||||||
|
json={"phonemes": phonemes, "voice": voice},
|
||||||
|
headers={"Accept": "audio/wav"}
|
||||||
|
)
|
||||||
|
if response.status_code != 200:
|
||||||
|
print(f"Error: {response.text}")
|
||||||
|
return None
|
||||||
|
return response.content
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
with open(f"outputnostreammoney.wav", "wb") as f:
|
||||||
|
f.write(generate_audio_from_phonemes(r"mɪsəki ɪz ɐn ɪkspˌɛɹəmˈɛntᵊl ʤˈitəpˈi ˈɛnʤən dəzˈInd tə pˈWəɹ fjˈuʧəɹ vˈɜɹʒənz ʌv kəkˈɔɹO mˈɑdᵊlz."))
|
Loading…
Add table
Reference in a new issue