more features

2025-08-31 22:09:26 +00:00 · 2025-01-31 21:45:37 +01:00 · 2025-01-31 21:45:37 +01:00 · 178d100596
commit 178d100596
parent b50f9bdc12
2 changed files with 27 additions and 18 deletions
--- a/audiblez.py
+++ b/audiblez.py
@ -12,8 +12,8 @@ import sys
 import time
 import shutil
 import subprocess
-import warnings
 import re
+from tabulate import tabulate
 from pathlib import Path
 from string import Formatter
 from yaspin import yaspin
@ -31,19 +31,18 @@ sample_rate = 24000

 def main(file_path, voice, pick_manually, speed, max_chapters=None):
    filename = Path(file_path).name
-    warnings.simplefilter("ignore")
    book = epub.read_epub(file_path)
    meta_title = book.get_metadata('DC', 'title')
    title = meta_title[0][0] if meta_title else ''
    meta_creator = book.get_metadata('DC', 'creator')
-    by_creator = 'by ' + meta_creator[0][0] if meta_creator else ''
+    creator = meta_creator[0][0] if meta_creator else ''

    cover_maybe = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_COVER]
    cover_image = cover_maybe[0].get_content() if cover_maybe else b""
    if cover_maybe:
        print(f'Found cover image {cover_maybe[0].file_name} in {cover_maybe[0].media_type} format')

-    intro = f'{title} {by_creator}'
+    intro = f'{title} – {creator}.\n\n'
    print(intro)

    document_chapters = find_document_chapters_and_extract_texts(book)
@ -102,7 +101,7 @@ def main(file_path, voice, pick_manually, speed, max_chapters=None):
                chapter_wav_files.remove(chapter_filename)

    if has_ffmpeg:
-        create_index_file(title, by_creator, chapter_wav_files)
+        create_index_file(title, creator, chapter_wav_files)
        create_m4b(chapter_wav_files, filename, cover_image)


@ -151,25 +150,32 @@ def is_chapter(c):
    return has_min_len and title_looks_like_chapter


+def chapter_beginning_one_liner(c, chars=20):
+    s = c.extracted_text[:chars].strip().replace('\n', ' ').replace('\r', ' ')
+    return s + '…' if len(s) > 0 else ''
+
+
 def find_good_chapters(document_chapters):
    chapters = [c for c in document_chapters if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]
-    from tabulate import tabulate
    if len(chapters) == 0:
-        print('Not easy to recognize the chapters, defaulting to all available documents.')
-        chapters = [c for c in document_chapters if c.get_type() == ebooklib.ITEM_DOCUMENT]
+        print('Not easy to recognize the chapters, defaulting to all non-empty documents.')
+        chapters = [c for c in document_chapters if c.get_type() == ebooklib.ITEM_DOCUMENT and len(c.extracted_text) > 10]
    print(tabulate([
-        [i, c.get_name(), len(c.extracted_text), '✅' if c in chapters else '']
+        [i, c.get_name(), len(c.extracted_text), '✅' if c in chapters else '', chapter_beginning_one_liner(c)]
        for i, c in enumerate(document_chapters, start=1)
-    ], headers=['#', 'Chapter', 'Text Length', 'Selected']))
+    ], headers=['#', 'Chapter', 'Text Length', 'Selected', 'First words']))
    return chapters


 def pick_chapters(chapters):
-    all_chapters_names = [c.get_name() for c in chapters if c.get_type() == ebooklib.ITEM_DOCUMENT]
+    # Display the document name, the length and first 50 characters of the text
+    chapters_by_names = {
+        f'{c.get_name()}\t({len(c.extracted_text)} chars)\t[{chapter_beginning_one_liner(c, 50)}]': c
+        for c in chapters}
    title = 'Select which chapters to read in the audiobook'
-    selected_chapters_names = pick(all_chapters_names, title, multiselect=True, min_selection_count=1)
-    selected_chapters_names = [c[0] for c in selected_chapters_names]
-    selected_chapters = [c for c in chapters if c.get_name() in selected_chapters_names]
+    ret = pick(list(chapters_by_names.keys()), title, multiselect=True, min_selection_count=1)
+    selected_chapters_out_of_order = [chapters_by_names[r[0]] for r in ret]
+    selected_chapters = [c for c in chapters if c in selected_chapters_out_of_order]
    return selected_chapters


--- a/test/test_main.py
+++ b/test/test_main.py
@ -16,26 +16,29 @@ class MainTest(unittest.TestCase):
        merged_args = dict(voice='af_sky', pick_manually=False, speed=1.0, max_chapters=2)
        merged_args.update(kwargs)
        main(f'{name}.epub', **merged_args)
-        self.assertTrue(Path(f'{name}.m4b').exists())
+        m4b_file = Path(f'{name}.m4b')
+        self.assertTrue(m4b_file.exists())
+        self.assertTrue(m4b_file.stat().st_size > 256 * 1024)
        chapter_1_wav = Path(f'{name}_chapter_1.wav')
        self.assertTrue(chapter_1_wav.exists())
        self.assertTrue(chapter_1_wav.stat().st_size > 256 * 1024)

    def test_poe(self):
        url = 'https://www.gutenberg.org/ebooks/1064.epub.images'
-        self.base('poe')
+        self.base('poe', url)

    def test_orwell(self):
        url = 'https://archive.org/download/AnimalFarmByGeorgeOrwell/Animal%20Farm%20by%20George%20Orwell.epub'
        self.base('orwell', url)

    def test_italian_pirandello(self):
-        self.base('pirandello', voice='im_nicola')
+        url = 'https://www.liberliber.eu/mediateca/libri/p/pirandello/cosi_e_se_vi_pare_1925/epub/pirandello_cosi_e_se_vi_pare_1925.epub'
+        self.base('pirandello', url, voice='im_nicola')
        self.assertTrue(Path('pirandello.m4b').exists())

    def test_italian_manzoni(self):
        url = 'https://www.liberliber.eu/mediateca/libri/m/manzoni/i_promessi_sposi/epub/manzoni_i_promessi_sposi.epub'
-        self.base('manzoni', url, voice='im_nicola')
+        self.base('manzoni', url, voice='im_nicola', max_chapters=1)

    def test_french_baudelaire(self):
        url = 'http://gallica.bnf.fr/ark:/12148/bpt6k70861t.epub'