broken

2025-08-05 16:48:55 +00:00 · 2025-01-30 09:24:21 +01:00 · 2025-01-30 09:24:21 +01:00 · bf09d6ad82
commit bf09d6ad82
parent 12fbf89fcc
3 changed files with 23 additions and 20 deletions
--- a/README.md
+++ b/README.md
@ -57,14 +57,14 @@ You can try them here: [https://huggingface.co/spaces/hexgrad/Kokoro-TTS](https:

 ## How to run on GPU

-By default audiblez runs on CPU. If it finds CUDA available as Torch device it tries to use it.
+Experimental support for Cuda is available b

-We don't currently support Apple Silicon, as there is not yet a Kokoro implementation in MLX. As soon as it will be available, we will support it.
-
-## How to run on Google Colab
+By default audiblez runs on CPU. If you pass the option `--cuda` it will try to use the Cuda device via Torch.

 Check out this example: [Audiblez running on a Google Colab Notebook with Cuda ](https://colab.research.google.com/drive/164PQLowogprWQpRjKk33e-8IORAvqXKI?usp=sharing]).

+We don't currently support Apple Silicon, as there is not yet a Kokoro implementation in MLX. As soon as it will be available, we will support it.
+
 ## Author

 by [Claudio Santini](https://claudio.uk) in 2025, distributed under MIT licence.
--- a/audiblez.py
+++ b/audiblez.py
@ -67,8 +67,6 @@ def main(pipeline, file_path, voice, pick_manually, speed):
    print(f'Estimated time remaining (assuming {chars_per_sec} chars/sec): {strfdelta((total_chars - processed_chars) / chars_per_sec)}')

    chapter_mp3_files = []
-    durations = {}
-
    for i, text in enumerate(texts, start=1):
        chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav')
        chapter_mp3_files.append(chapter_filename)
@ -97,8 +95,8 @@ def main(pipeline, file_path, voice, pick_manually, speed):
        print('Progress:', f'{progress}%\n')

    if has_ffmpeg:
-        create_index_file(title, creator, chapter_mp3_files, durations)
-        create_m4b(chapter_mp3_files, filename, title, creator, cover_image)
+        create_index_file(title, creator, chapter_mp3_files)
+        create_m4b(chapter_mp3_files, filename, cover_image)


 def extract_texts(chapters):
@ -132,7 +130,6 @@ def find_chapters(book, verbose=False):
        for item in book.get_items():
            if item.get_type() == ebooklib.ITEM_DOCUMENT:
                print(f"'{item.get_name()}'" + ', #' + str(len(item.get_body_content())))
-                # print(f'{item.get_name()}'.ljust(60), str(len(item.get_body_content())).ljust(15), 'X' if item in chapters else '-')
    if len(chapters) == 0:
        print('Not easy to find the chapters, defaulting to all available documents.')
        chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT]
@ -161,7 +158,7 @@ def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
    return f.format(fmt, **values)


-def create_m4b(chapter_files, filename, title, author, cover_image):
+def create_m4b(chapter_files, filename, cover_image):
    tmp_filename = filename.replace('.epub', '.tmp.mp4')
    if not Path(tmp_filename).exists():
        combined_audio = AudioSegment.empty()
@ -206,15 +203,14 @@ def probe_duration(file_name):
    return float(proc.stdout.strip())


-def create_index_file(title, creator, chapter_mp3_files, durations):
+def create_index_file(title, creator, chapter_mp3_files):
    with open("chapters.txt", "w") as f:
        f.write(f";FFMETADATA1\ntitle={title}\nartist={creator}\n\n")
        start = 0
        i = 0
        for c in chapter_mp3_files:
-            if c not in durations:
-                durations[c] = probe_duration(c)
-            end = start + (int)(durations[c] * 1000)
+            duration = probe_duration(c)
+            end = start + (int)(duration * 1000)
            f.write(f"[CHAPTER]\nTIMEBASE=1/1000\nSTART={start}\nEND={end}\ntitle=Chapter {i}\n\n")
            i += 1
            start = end
@ -231,12 +227,14 @@ def cli_main():
    parser.add_argument('-v', '--voice', default=default_voice, help=f'Choose narrating voice: {voices_str}')
    parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook', action='store_true')
    parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float)
+    parser.add_argument('-c', '--cuda', default=False, help=f'Use GPU via Cuda in Torch if available', type=float)

    if len(sys.argv) == 1:
        parser.print_help(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()

+    if args.cuda:
        if torch.cuda.is_available():
            print('CUDA GPU available')
            torch.set_default_device('cuda')
--- a/test/test_main.py
+++ b/test/test_main.py
@ -8,7 +8,7 @@ from audiblez import main

 class MainTest(unittest.TestCase):
    def base(self, file_path, **kwargs):
-        pipeline = KPipeline(lang_code='a')  # a for american or b for british
+        pipeline = KPipeline(lang_code='a')
        main(pipeline, file_path=file_path, voice='af_sky', pick_manually=False, speed=1, **kwargs)

    # def test_0_txt(self):
@ -30,3 +30,8 @@ class MainTest(unittest.TestCase):
    #     Path('gene.m4b').unlink(missing_ok=True)
    #     self.base(file_path='../epub/gene.epub')
    #     self.assertTrue(Path('gene.m4b').exists())
+
+    def test_orwell(self):
+        Path('orwell.m4b').unlink(missing_ok=True)
+        self.base(file_path='../epub/orwell.epub')
+        self.assertTrue(Path('orwell.m4b').exists())