2025-05-01 17:53:01 +00:00
|
|
|
import base64
|
|
|
|
import json
|
|
|
|
|
|
|
|
import pydub
|
|
|
|
import requests
|
|
|
|
|
|
|
|
text = """Running on localhost:7860"""
|
|
|
|
|
|
|
|
|
|
|
|
Type = "wav"
|
|
|
|
response = requests.post(
|
|
|
|
"http://localhost:8880/dev/captioned_speech",
|
|
|
|
json={
|
|
|
|
"model": "kokoro",
|
|
|
|
"input": text,
|
|
|
|
"voice": "af_heart+af_sky",
|
|
|
|
"speed": 1.0,
|
|
|
|
"response_format": Type,
|
|
|
|
"stream": True,
|
|
|
|
},
|
|
|
|
stream=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
f = open(f"outputstream.{Type}", "wb")
|
|
|
|
for chunk in response.iter_lines(decode_unicode=True):
|
|
|
|
if chunk:
|
|
|
|
temp_json = json.loads(chunk)
|
|
|
|
if temp_json["timestamps"] != []:
|
|
|
|
chunk_json = temp_json
|
|
|
|
|
|
|
|
# Decode base 64 stream to bytes
|
|
|
|
chunk_audio = base64.b64decode(temp_json["audio"].encode("utf-8"))
|
|
|
|
|
|
|
|
# Process streaming chunks
|
|
|
|
f.write(chunk_audio)
|
|
|
|
|
|
|
|
# Print word level timestamps
|
2025-05-01 18:16:36 +00:00
|
|
|
print(chunk_json["timestamps"])
|