Merge pull request #310 from fireblade2534/master

Added some better saftey checks to captioned speech
2025-08-05 16:48:53 +00:00 · 2025-05-09 09:13:02 -04:00 · 2025-05-09 09:13:02 -04:00 · 6f1450c5b4
commit 6f1450c5b4
parent 36197b9266 243d98e339
2 changed files with 31 additions and 5 deletions
--- a/api/src/routers/development.py
+++ b/api/src/routers/development.py
@ -224,10 +224,13 @@ async def create_captioned_speech(
                                ).decode("utf-8")
                                # Add any chunks that may be in the acumulator into the return word_timestamps
-                                chunk_data.word_timestamps = (
+                                if chunk_data.word_timestamps is not None:
-                                    timestamp_acumulator + chunk_data.word_timestamps
+                                    chunk_data.word_timestamps = (
-                                )
+                                        timestamp_acumulator + chunk_data.word_timestamps
-                                timestamp_acumulator = []
+                                    )
                                    timestamp_acumulator = []
                                else:
                                    chunk_data.word_timestamps = []
                                yield CaptionedSpeechResponse(
                                    audio=base64_chunk,
@ -272,7 +275,7 @@ async def create_captioned_speech(
                            )
                            # Add any chunks that may be in the acumulator into the return word_timestamps
-                            if chunk_data.word_timestamps != None:
+                            if chunk_data.word_timestamps is not None:
                                chunk_data.word_timestamps = (
                                    timestamp_acumulator + chunk_data.word_timestamps
                                )
--- a/dev/Test
+++ b/dev/Test
@ -0,0 +1,23 @@
 import base64
 import json
 import pydub
 import requests
 def generate_audio_from_phonemes(phonemes: str, voice: str = "af_bella"):
    """Generate audio from phonemes"""
    response = requests.post(
        "http://localhost:8880/dev/generate_from_phonemes",
        json={"phonemes": phonemes, "voice": voice},
        headers={"Accept": "audio/wav"}
    )
    if response.status_code != 200:
        print(f"Error: {response.text}")
        return None
    return response.content
 with open(f"outputnostreammoney.wav", "wb") as f:
    f.write(generate_audio_from_phonemes(r"mɪsəki ɪz ɐn ɪkspˌɛɹəmˈɛntᵊl ʤˈitəpˈi ˈɛnʤən dəzˈInd tə pˈWəɹ fjˈuʧəɹ vˈɜɹʒənz ʌv kəkˈɔɹO mˈɑdᵊlz."))