audiblez/audiblez.py

# audiblez - A program to convert e-books into audiobooks using high-quality
# Kokoro-82M text-to-speech model.
# Distributed under the MIT License for educational purposes.
# by Claudio Santini (2025) - https://claudio.uk

import argparse
import time
import shutil
import subprocess
import soundfile as sf
import ebooklib
import warnings
from pathlib import Path
from string import Formatter
from bs4 import BeautifulSoup
from kokoro_onnx import Kokoro
from ebooklib import epub

kokoro = Kokoro('kokoro-v0_19.onnx', 'voices.json')


def main(file_path, lang, voice):
    file_name = Path(file_path).name
    with warnings.catch_warnings():
        book = epub.read_epub(file_path)
    title = book.get_metadata('DC', 'title')[0][0]
    creator = book.get_metadata('DC', 'creator')[0][0]
    intro = f'{title} by {creator}'
    print(intro)
    chapters = find_chapters(book)
    print([c.get_name() for c in chapters])
    texts = extract_texts(chapters)
    has_ffmpeg = shutil.which('ffmpeg') is not None
    use_fmmpeg = has_ffmpeg and not args.wav

    i = 1
    for text in texts:
        chapter_filename = file_name.lower().replace('.epub', f'_chapter_{i}.wav')
        if Path(chapter_filename).exists() or Path(chapter_filename.replace('.wav', '.mp3')).exists():
            print(f'File for chapter {i} already exists. Skipping')
            i += 1
            continue
        print(f'Reading chapter {i} ({len(text):,} characters)...')
        start_time = time.time()
        samples, sample_rate = kokoro.create(text, voice=voice, speed=1.0, lang=lang)
        sf.write(f'{chapter_filename}', samples, sample_rate)
        end_time = time.time()
        delta_seconds = end_time - start_time
        charters_per_second = len(text) / delta_seconds
        print('Chapter written to', chapter_filename)
        print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({charters_per_second:.0f} charters per second')
        remaining_characters = sum([len(t) for t in texts[i - 1:]])
        remaining_time = remaining_characters / charters_per_second
        print(f'Estimated time remaining: {strfdelta(remaining_time)}')
        print()
        if use_fmmpeg:
            print(f'In parallel, converting chapter {i} to mp3...')
            convert_to_mp3(chapter_filename)
        i += 1


def extract_texts(chapters):
    texts = []
    for chapter in chapters:
        xml = chapter.get_body_content()
        soup = BeautifulSoup(xml, features='lxml')
        chapter_text = ''
        html_content_tags = ['title', 'p', 'h1', 'h2', 'h3', 'h4']
        for child in soup.find_all(html_content_tags):
            inner_text = child.text.strip() if child.text else ""
            if inner_text:
                chapter_text += inner_text + '\n'
        texts.append(chapter_text)
    return texts


def find_chapters(book, verbose=False):
    is_chapter = lambda c: 'chapter' in c.get_name().lower() or 'part' in c.get_name().lower()
    chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]
    if verbose:
        for item in book.get_items():
            if item.get_type() == ebooklib.ITEM_DOCUMENT:
                print((item.get_name(), len(item.get_body_content()), 'YES' if item in chapters else '-'))
    return chapters


def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
    remainder = int(tdelta)
    f = Formatter()
    desired_fields = [field_tuple[1] for field_tuple in f.parse(fmt)]
    possible_fields = ('W', 'D', 'H', 'M', 'S')
    constants = {'W': 604800, 'D': 86400, 'H': 3600, 'M': 60, 'S': 1}
    values = {}
    for field in possible_fields:
        if field in desired_fields and field in constants:
            values[field], remainder = divmod(remainder, constants[field])
    return f.format(fmt, **values)


def convert_to_mp3(wav_file):
    if shutil.which('ffmpeg') is None:
        print('ffmpeg not found. Please install ffmpeg to convert .wav files to .mp3')
        return
    mp3_file = wav_file.replace('.wav', '.mp3')
    print(f'Converting {wav_file} to {mp3_file}...')
    subprocess.Popen(['ffmpeg', '-i', wav_file, mp3_file, ' && rm ', wav_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)


if __name__ == '__main__':
    voices = list(kokoro.get_voices())
    voices_str = ', '.join(voices)
    epilog = 'example:\n' + \
             '  audiblez book.epub -l en-us -v af_sky'
    parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('epub_file_path', help='Path to the epub file')
    parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
    parser.add_argument('-v', '--voice', default=voices[0], help=f'Choose narrating voice: {voices_str}')
    parser.add_argument('-w', '--wav', help="Don't convert to .mp3, just create .wav files", action='store_true')
    args = parser.parse_args()
    main(args.epub_file_path, args.lang, args.voice)
init 2025-01-14 15:35:10 +01:00			`# audiblez - A program to convert e-books into audiobooks using high-quality`
			`# Kokoro-82M text-to-speech model.`
			`# Distributed under the MIT License for educational purposes.`
			`# by Claudio Santini (2025) - https://claudio.uk`

			`import argparse`
			`import time`
			`import shutil`
			`import subprocess`
			`import soundfile as sf`
			`import ebooklib`
			`import warnings`
			`from pathlib import Path`
			`from string import Formatter`
			`from bs4 import BeautifulSoup`
			`from kokoro_onnx import Kokoro`
			`from ebooklib import epub`

			`kokoro = Kokoro('kokoro-v0_19.onnx', 'voices.json')`


			`def main(file_path, lang, voice):`
			`file_name = Path(file_path).name`
			`with warnings.catch_warnings():`
			`book = epub.read_epub(file_path)`
			`title = book.get_metadata('DC', 'title')[0][0]`
			`creator = book.get_metadata('DC', 'creator')[0][0]`
			`intro = f'{title} by {creator}'`
			`print(intro)`
			`chapters = find_chapters(book)`
			`print([c.get_name() for c in chapters])`
			`texts = extract_texts(chapters)`
			`has_ffmpeg = shutil.which('ffmpeg') is not None`
			`use_fmmpeg = has_ffmpeg and not args.wav`

			`i = 1`
			`for text in texts:`
			`chapter_filename = file_name.lower().replace('.epub', f'_chapter_{i}.wav')`
			`if Path(chapter_filename).exists() or Path(chapter_filename.replace('.wav', '.mp3')).exists():`
			`print(f'File for chapter {i} already exists. Skipping')`
			`i += 1`
			`continue`
			`print(f'Reading chapter {i} ({len(text):,} characters)...')`
			`start_time = time.time()`
			`samples, sample_rate = kokoro.create(text, voice=voice, speed=1.0, lang=lang)`
			`sf.write(f'{chapter_filename}', samples, sample_rate)`
			`end_time = time.time()`
			`delta_seconds = end_time - start_time`
			`charters_per_second = len(text) / delta_seconds`
			`print('Chapter written to', chapter_filename)`
			`print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({charters_per_second:.0f} charters per second')`
			`remaining_characters = sum([len(t) for t in texts[i - 1:]])`
			`remaining_time = remaining_characters / charters_per_second`
			`print(f'Estimated time remaining: {strfdelta(remaining_time)}')`
			`print()`
			`if use_fmmpeg:`
			`print(f'In parallel, converting chapter {i} to mp3...')`
			`convert_to_mp3(chapter_filename)`
			`i += 1`


			`def extract_texts(chapters):`
			`texts = []`
			`for chapter in chapters:`
			`xml = chapter.get_body_content()`
			`soup = BeautifulSoup(xml, features='lxml')`
			`chapter_text = ''`
			`html_content_tags = ['title', 'p', 'h1', 'h2', 'h3', 'h4']`
			`for child in soup.find_all(html_content_tags):`
			`inner_text = child.text.strip() if child.text else ""`
			`if inner_text:`
			`chapter_text += inner_text + '\n'`
			`texts.append(chapter_text)`
			`return texts`


			`def find_chapters(book, verbose=False):`
			`is_chapter = lambda c: 'chapter' in c.get_name().lower() or 'part' in c.get_name().lower()`
			`chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]`
			`if verbose:`
			`for item in book.get_items():`
			`if item.get_type() == ebooklib.ITEM_DOCUMENT:`
			`print((item.get_name(), len(item.get_body_content()), 'YES' if item in chapters else '-'))`
			`return chapters`


			`def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):`
			`remainder = int(tdelta)`
			`f = Formatter()`
			`desired_fields = [field_tuple[1] for field_tuple in f.parse(fmt)]`
			`possible_fields = ('W', 'D', 'H', 'M', 'S')`
			`constants = {'W': 604800, 'D': 86400, 'H': 3600, 'M': 60, 'S': 1}`
			`values = {}`
			`for field in possible_fields:`
			`if field in desired_fields and field in constants:`
			`values[field], remainder = divmod(remainder, constants[field])`
			`return f.format(fmt, **values)`


			`def convert_to_mp3(wav_file):`
			`if shutil.which('ffmpeg') is None:`
			`print('ffmpeg not found. Please install ffmpeg to convert .wav files to .mp3')`
			`return`
			`mp3_file = wav_file.replace('.wav', '.mp3')`
			`print(f'Converting {wav_file} to {mp3_file}...')`
			`subprocess.Popen(['ffmpeg', '-i', wav_file, mp3_file, ' && rm ', wav_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)`


			`if __name__ == '__main__':`
			`voices = list(kokoro.get_voices())`
			`voices_str = ', '.join(voices)`
			`epilog = 'example:\n' + \`
			`' audiblez book.epub -l en-us -v af_sky'`
			`parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)`
			`parser.add_argument('epub_file_path', help='Path to the epub file')`
			`parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')`
			`parser.add_argument('-v', '--voice', default=voices[0], help=f'Choose narrating voice: {voices_str}')`
			`parser.add_argument('-w', '--wav', help="Don't convert to .mp3, just create .wav files", action='store_true')`
			`args = parser.parse_args()`
			`main(args.epub_file_path, args.lang, args.voice)`