mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-04-13 09:39:17 +00:00
53 lines
1.5 KiB
Python
53 lines
1.5 KiB
Python
![]() |
|
||
|
#!/usr/bin/env rye run python
|
||
|
|
||
|
import time
|
||
|
from pathlib import Path
|
||
|
|
||
|
from openai import OpenAI
|
||
|
|
||
|
# gets OPENAI_API_KEY from your environment variables
|
||
|
openai = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed-for-local")
|
||
|
|
||
|
speech_file_path = Path(__file__).parent / "speech.mp3"
|
||
|
|
||
|
|
||
|
def main() -> None:
|
||
|
stream_to_speakers()
|
||
|
|
||
|
# Create text-to-speech audio file
|
||
|
with openai.audio.speech.with_streaming_response.create(
|
||
|
model="kokoro",
|
||
|
voice="af",
|
||
|
input="the quick brown fox jumped over the lazy dogs",
|
||
|
) as response:
|
||
|
response.stream_to_file(speech_file_path)
|
||
|
|
||
|
|
||
|
|
||
|
def stream_to_speakers() -> None:
|
||
|
import pyaudio
|
||
|
|
||
|
player_stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)
|
||
|
|
||
|
start_time = time.time()
|
||
|
|
||
|
with openai.audio.speech.with_streaming_response.create(
|
||
|
model="kokoro",
|
||
|
voice="af",
|
||
|
response_format="pcm", # similar to WAV, but without a header chunk at the start.
|
||
|
input="""I see skies of blue and clouds of white
|
||
|
The bright blessed days, the dark sacred nights
|
||
|
And I think to myself
|
||
|
What a wonderful world""",
|
||
|
) as response:
|
||
|
print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms")
|
||
|
for chunk in response.iter_bytes(chunk_size=1024):
|
||
|
player_stream.write(chunk)
|
||
|
|
||
|
print(f"Done in {int((time.time() - start_time) * 1000)}ms.")
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|