mirror of
https://github.com/santinic/audiblez.git
synced 2025-08-05 16:48:55 +00:00
more features
This commit is contained in:
parent
b50f9bdc12
commit
178d100596
2 changed files with 27 additions and 18 deletions
34
audiblez.py
34
audiblez.py
|
@ -12,8 +12,8 @@ import sys
|
||||||
import time
|
import time
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import warnings
|
|
||||||
import re
|
import re
|
||||||
|
from tabulate import tabulate
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from string import Formatter
|
from string import Formatter
|
||||||
from yaspin import yaspin
|
from yaspin import yaspin
|
||||||
|
@ -31,19 +31,18 @@ sample_rate = 24000
|
||||||
|
|
||||||
def main(file_path, voice, pick_manually, speed, max_chapters=None):
|
def main(file_path, voice, pick_manually, speed, max_chapters=None):
|
||||||
filename = Path(file_path).name
|
filename = Path(file_path).name
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
book = epub.read_epub(file_path)
|
book = epub.read_epub(file_path)
|
||||||
meta_title = book.get_metadata('DC', 'title')
|
meta_title = book.get_metadata('DC', 'title')
|
||||||
title = meta_title[0][0] if meta_title else ''
|
title = meta_title[0][0] if meta_title else ''
|
||||||
meta_creator = book.get_metadata('DC', 'creator')
|
meta_creator = book.get_metadata('DC', 'creator')
|
||||||
by_creator = 'by ' + meta_creator[0][0] if meta_creator else ''
|
creator = meta_creator[0][0] if meta_creator else ''
|
||||||
|
|
||||||
cover_maybe = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_COVER]
|
cover_maybe = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_COVER]
|
||||||
cover_image = cover_maybe[0].get_content() if cover_maybe else b""
|
cover_image = cover_maybe[0].get_content() if cover_maybe else b""
|
||||||
if cover_maybe:
|
if cover_maybe:
|
||||||
print(f'Found cover image {cover_maybe[0].file_name} in {cover_maybe[0].media_type} format')
|
print(f'Found cover image {cover_maybe[0].file_name} in {cover_maybe[0].media_type} format')
|
||||||
|
|
||||||
intro = f'{title} {by_creator}'
|
intro = f'{title} – {creator}.\n\n'
|
||||||
print(intro)
|
print(intro)
|
||||||
|
|
||||||
document_chapters = find_document_chapters_and_extract_texts(book)
|
document_chapters = find_document_chapters_and_extract_texts(book)
|
||||||
|
@ -102,7 +101,7 @@ def main(file_path, voice, pick_manually, speed, max_chapters=None):
|
||||||
chapter_wav_files.remove(chapter_filename)
|
chapter_wav_files.remove(chapter_filename)
|
||||||
|
|
||||||
if has_ffmpeg:
|
if has_ffmpeg:
|
||||||
create_index_file(title, by_creator, chapter_wav_files)
|
create_index_file(title, creator, chapter_wav_files)
|
||||||
create_m4b(chapter_wav_files, filename, cover_image)
|
create_m4b(chapter_wav_files, filename, cover_image)
|
||||||
|
|
||||||
|
|
||||||
|
@ -151,25 +150,32 @@ def is_chapter(c):
|
||||||
return has_min_len and title_looks_like_chapter
|
return has_min_len and title_looks_like_chapter
|
||||||
|
|
||||||
|
|
||||||
|
def chapter_beginning_one_liner(c, chars=20):
|
||||||
|
s = c.extracted_text[:chars].strip().replace('\n', ' ').replace('\r', ' ')
|
||||||
|
return s + '…' if len(s) > 0 else ''
|
||||||
|
|
||||||
|
|
||||||
def find_good_chapters(document_chapters):
|
def find_good_chapters(document_chapters):
|
||||||
chapters = [c for c in document_chapters if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]
|
chapters = [c for c in document_chapters if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]
|
||||||
from tabulate import tabulate
|
|
||||||
if len(chapters) == 0:
|
if len(chapters) == 0:
|
||||||
print('Not easy to recognize the chapters, defaulting to all available documents.')
|
print('Not easy to recognize the chapters, defaulting to all non-empty documents.')
|
||||||
chapters = [c for c in document_chapters if c.get_type() == ebooklib.ITEM_DOCUMENT]
|
chapters = [c for c in document_chapters if c.get_type() == ebooklib.ITEM_DOCUMENT and len(c.extracted_text) > 10]
|
||||||
print(tabulate([
|
print(tabulate([
|
||||||
[i, c.get_name(), len(c.extracted_text), '✅' if c in chapters else '']
|
[i, c.get_name(), len(c.extracted_text), '✅' if c in chapters else '', chapter_beginning_one_liner(c)]
|
||||||
for i, c in enumerate(document_chapters, start=1)
|
for i, c in enumerate(document_chapters, start=1)
|
||||||
], headers=['#', 'Chapter', 'Text Length', 'Selected']))
|
], headers=['#', 'Chapter', 'Text Length', 'Selected', 'First words']))
|
||||||
return chapters
|
return chapters
|
||||||
|
|
||||||
|
|
||||||
def pick_chapters(chapters):
|
def pick_chapters(chapters):
|
||||||
all_chapters_names = [c.get_name() for c in chapters if c.get_type() == ebooklib.ITEM_DOCUMENT]
|
# Display the document name, the length and first 50 characters of the text
|
||||||
|
chapters_by_names = {
|
||||||
|
f'{c.get_name()}\t({len(c.extracted_text)} chars)\t[{chapter_beginning_one_liner(c, 50)}]': c
|
||||||
|
for c in chapters}
|
||||||
title = 'Select which chapters to read in the audiobook'
|
title = 'Select which chapters to read in the audiobook'
|
||||||
selected_chapters_names = pick(all_chapters_names, title, multiselect=True, min_selection_count=1)
|
ret = pick(list(chapters_by_names.keys()), title, multiselect=True, min_selection_count=1)
|
||||||
selected_chapters_names = [c[0] for c in selected_chapters_names]
|
selected_chapters_out_of_order = [chapters_by_names[r[0]] for r in ret]
|
||||||
selected_chapters = [c for c in chapters if c.get_name() in selected_chapters_names]
|
selected_chapters = [c for c in chapters if c in selected_chapters_out_of_order]
|
||||||
return selected_chapters
|
return selected_chapters
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -16,26 +16,29 @@ class MainTest(unittest.TestCase):
|
||||||
merged_args = dict(voice='af_sky', pick_manually=False, speed=1.0, max_chapters=2)
|
merged_args = dict(voice='af_sky', pick_manually=False, speed=1.0, max_chapters=2)
|
||||||
merged_args.update(kwargs)
|
merged_args.update(kwargs)
|
||||||
main(f'{name}.epub', **merged_args)
|
main(f'{name}.epub', **merged_args)
|
||||||
self.assertTrue(Path(f'{name}.m4b').exists())
|
m4b_file = Path(f'{name}.m4b')
|
||||||
|
self.assertTrue(m4b_file.exists())
|
||||||
|
self.assertTrue(m4b_file.stat().st_size > 256 * 1024)
|
||||||
chapter_1_wav = Path(f'{name}_chapter_1.wav')
|
chapter_1_wav = Path(f'{name}_chapter_1.wav')
|
||||||
self.assertTrue(chapter_1_wav.exists())
|
self.assertTrue(chapter_1_wav.exists())
|
||||||
self.assertTrue(chapter_1_wav.stat().st_size > 256 * 1024)
|
self.assertTrue(chapter_1_wav.stat().st_size > 256 * 1024)
|
||||||
|
|
||||||
def test_poe(self):
|
def test_poe(self):
|
||||||
url = 'https://www.gutenberg.org/ebooks/1064.epub.images'
|
url = 'https://www.gutenberg.org/ebooks/1064.epub.images'
|
||||||
self.base('poe')
|
self.base('poe', url)
|
||||||
|
|
||||||
def test_orwell(self):
|
def test_orwell(self):
|
||||||
url = 'https://archive.org/download/AnimalFarmByGeorgeOrwell/Animal%20Farm%20by%20George%20Orwell.epub'
|
url = 'https://archive.org/download/AnimalFarmByGeorgeOrwell/Animal%20Farm%20by%20George%20Orwell.epub'
|
||||||
self.base('orwell', url)
|
self.base('orwell', url)
|
||||||
|
|
||||||
def test_italian_pirandello(self):
|
def test_italian_pirandello(self):
|
||||||
self.base('pirandello', voice='im_nicola')
|
url = 'https://www.liberliber.eu/mediateca/libri/p/pirandello/cosi_e_se_vi_pare_1925/epub/pirandello_cosi_e_se_vi_pare_1925.epub'
|
||||||
|
self.base('pirandello', url, voice='im_nicola')
|
||||||
self.assertTrue(Path('pirandello.m4b').exists())
|
self.assertTrue(Path('pirandello.m4b').exists())
|
||||||
|
|
||||||
def test_italian_manzoni(self):
|
def test_italian_manzoni(self):
|
||||||
url = 'https://www.liberliber.eu/mediateca/libri/m/manzoni/i_promessi_sposi/epub/manzoni_i_promessi_sposi.epub'
|
url = 'https://www.liberliber.eu/mediateca/libri/m/manzoni/i_promessi_sposi/epub/manzoni_i_promessi_sposi.epub'
|
||||||
self.base('manzoni', url, voice='im_nicola')
|
self.base('manzoni', url, voice='im_nicola', max_chapters=1)
|
||||||
|
|
||||||
def test_french_baudelaire(self):
|
def test_french_baudelaire(self):
|
||||||
url = 'http://gallica.bnf.fr/ark:/12148/bpt6k70861t.epub'
|
url = 'http://gallica.bnf.fr/ark:/12148/bpt6k70861t.epub'
|
||||||
|
|
Loading…
Add table
Reference in a new issue