voices, chunking etc

2025-08-05 16:48:55 +00:00 · 2025-01-31 12:13:55 +01:00 · 2025-01-31 12:13:55 +01:00 · a0ddd73307
commit a0ddd73307
parent ec2e678c51
2 changed files with 47 additions and 36 deletions
--- a/audiblez.py
+++ b/audiblez.py
@ -7,7 +7,6 @@ import sys
 import time
 import shutil
 import subprocess
-
 import numpy as np
 import soundfile
 import ebooklib
@ -23,14 +22,9 @@ from pydub import AudioSegment
 from pick import pick
 from tempfile import NamedTemporaryFile

+from voices import voices, available_voices_str
+
 sample_rate = 24000
-voices = [
-    'af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole',
-    'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric',
-    'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'bf_alice',
-    'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable',
-    'bm_george', 'bm_lewis'
-]


 def main(pipeline, file_path, voice, pick_manually, speed):
@ -83,24 +77,9 @@ def main(pipeline, file_path, voice, pick_manually, speed):
        if i == 1:
            text = intro + '.\n\n' + text
        start_time = time.time()
-        audio_segments = []
-        chunk_size = 5000  # Adjust chunk size as needed

-        # Fixed the text processing loop
-        remaining_text = text
-        while remaining_text:
-            chunk = remaining_text[:chunk_size]
-            remaining_text = remaining_text[chunk_size:]
-            
-            # Process the chunk
-            chunk_segments = []
-            for gs, ps, audio in pipeline(chunk, voice=voice, speed=speed):
-                chunk_segments.append(audio)
-            
-            if chunk_segments:  # Only append if we got valid audio segments
-                audio_segments.extend(chunk_segments)
-
-        if audio_segments:  # Only concatenate if we have segments
+        audio_segments = gen_audio_segments(pipeline, text, voice, speed)
+        if audio_segments:
            final_audio = np.concatenate(audio_segments)
            soundfile.write(chapter_filename, final_audio, sample_rate)
            end_time = time.time()
@ -120,6 +99,34 @@ def main(pipeline, file_path, voice, pick_manually, speed):
        create_index_file(title, by_creator, chapter_mp3_files)
        create_m4b(chapter_mp3_files, filename, cover_image)

+
+def gen_audio_segments(pipeline, text, voice, speed):
+    audio_segments = []
+    for gs, ps, audio in pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+'):
+        audio_segments.append(audio)
+    return audio_segments
+
+
+def gen_audio_segments_raw(pipeline, text, voice, speed):
+    audio_segments = []
+
+    chunk_size = 5000  # Adjust chunk size as needed
+    # Fixed the text processing loop
+    remaining_text = text
+    while remaining_text:
+        chunk = remaining_text[:chunk_size]
+        remaining_text = remaining_text[chunk_size:]
+
+        # Process the chunk
+        chunk_segments = []
+        for gs, ps, audio in pipeline(chunk, voice=voice, speed=speed):
+            chunk_segments.append(audio)
+
+        if chunk_segments:  # Only append if we got valid audio segments
+            audio_segments.extend(chunk_segments)
+    return audio_segments
+
+
 def extract_texts(chapters):
    texts = []
    for chapter in chapters:
@ -239,10 +246,11 @@ def create_index_file(title, creator, chapter_mp3_files):

 def cli_main():
    voices_str = ', '.join(voices)
-    epilog = 'example:\n' + \
-             '  audiblez book.epub -l en-us -v af_sky'
-    default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
-
+    epilog = ('example:\n' +
+              '  audiblez book.epub -l en-us -v af_sky\n\n' +
+              'available voices:\n' +
+              available_voices_str)
+    default_voice = 'af_sky'
    parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('epub_file_path', help='Path to the epub file')
    parser.add_argument('-v', '--voice', default=default_voice, help=f'Choose narrating voice: {voices_str}')
--- a/test/test_main.py
+++ b/test/test_main.py
@ -9,20 +9,23 @@ from audiblez import main

 class MainTest(unittest.TestCase):
    def base(self, file_path, **kwargs):
+        os.system('rm mini_chapter_*.wav')
        pipeline = KPipeline(lang_code='a')
        main(pipeline, file_path=file_path, voice='af_sky', pick_manually=False, speed=1, **kwargs)

-    def test_1_mini(self):
-        Path('mini.m4b').unlink(missing_ok=True)
-        self.base(file_path='../epub/mini.epub')
-        self.assertTrue(Path('mini.m4b').exists())
-
-    def test_2_allan_poe(self):
+    def test_1_allan_poe(self):
        Path('poe.m4b').unlink(missing_ok=True)
+        os.system('rm poe_chapter_*.wav')
        self.base(file_path='../epub/poe.epub')
        self.assertTrue(Path('poe.m4b').exists())

-    def test_orwell(self):
+    def test_2_mini(self):
+        Path('mini.m4b').unlink(missing_ok=True)
+        os.system('rm mini_chapter_*.wav')
+        self.base(file_path='../epub/mini.epub')
+        self.assertTrue(Path('mini.m4b').exists())
+
+    def test_3_orwell(self):
        Path('orwell.m4b').unlink(missing_ok=True)
        os.system('rm orwell_chapter_*.wav')
        self.base(file_path='../epub/orwell.epub')