audiblez/audiblez.py

#!/usr/bin/env python3
# audiblez - A program to convert e-books into audiobooks using high-quality
# Kokoro-82M text-to-speech model.
# Distributed under the MIT License for educational purposes.
# by Claudio Santini (2025) - https://claudio.uk

import argparse
import sys
import time
import shutil
import subprocess
import soundfile as sf
import ebooklib
import warnings
from pathlib import Path
from string import Formatter
from bs4 import BeautifulSoup
from kokoro_onnx import Kokoro
from ebooklib import epub

kokoro = Kokoro('kokoro-v0_19.onnx', 'voices.json')


def main(file_path, lang, voice):
    filename = Path(file_path).name
    with warnings.catch_warnings():
        book = epub.read_epub(file_path)
    title = book.get_metadata('DC', 'title')[0][0]
    creator = book.get_metadata('DC', 'creator')[0][0]
    intro = f'{title} by {creator}'
    print(intro)
    chapters = find_chapters(book)
    print([c.get_name() for c in chapters])
    texts = extract_texts(chapters)
    has_ffmpeg = shutil.which('ffmpeg') is not None
    if not has_ffmpeg:
        print('\033[91m' + 'ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files.' + '\033[0m')
    total_chars = sum([len(t) for t in texts])
    print('Started at:', time.strftime('%H:%M:%S'))
    print(f'Total characters: {total_chars:,}')
    print('Total words:', len(' '.join(texts).split(' ')))

    i = 1
    for text in texts:
        chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav')
        if Path(chapter_filename).exists() or Path(chapter_filename.replace('.wav', '.mp3')).exists():
            print(f'File for chapter {i} already exists. Skipping')
            i += 1
            continue
        print(f'Reading chapter {i} ({len(text):,} characters)...')
        if i == 1:
            text = intro + '.\n\n' + text
        start_time = time.time()
        samples, sample_rate = kokoro.create(text, voice=voice, speed=1.0, lang=lang)
        sf.write(f'{chapter_filename}', samples, sample_rate)
        end_time = time.time()
        delta_seconds = end_time - start_time
        chars_per_sec = len(text) / delta_seconds
        print('Chapter written to', chapter_filename)
        print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)')
        remaining_chars = sum([len(t) for t in texts[i - 1:]])
        remaining_time = remaining_chars / chars_per_sec
        print(f'Estimated time remaining: {strfdelta(remaining_time)}')
        print()
        if has_ffmpeg:
            convert_to_mp3(chapter_filename)
        i += 1
    if has_ffmpeg:
        create_m4b(filename)


def extract_texts(chapters):
    texts = []
    for chapter in chapters:
        xml = chapter.get_body_content()
        soup = BeautifulSoup(xml, features='lxml')
        chapter_text = ''
        html_content_tags = ['title', 'p', 'h1', 'h2', 'h3', 'h4']
        for child in soup.find_all(html_content_tags):
            inner_text = child.text.strip() if child.text else ""
            if inner_text:
                chapter_text += inner_text + '\n'
        texts.append(chapter_text)
    return texts


def find_chapters(book, verbose=False):
    is_chapter = lambda c: 'chapter' in c.get_name().lower() or 'part' in c.get_name().lower()
    chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]
    if verbose:
        for item in book.get_items():
            if item.get_type() == ebooklib.ITEM_DOCUMENT:
                print((item.get_name(), len(item.get_body_content()), 'YES' if item in chapters else '-'))
    return chapters


def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
    remainder = int(tdelta)
    f = Formatter()
    desired_fields = [field_tuple[1] for field_tuple in f.parse(fmt)]
    possible_fields = ('W', 'D', 'H', 'M', 'S')
    constants = {'W': 604800, 'D': 86400, 'H': 3600, 'M': 60, 'S': 1}
    values = {}
    for field in possible_fields:
        if field in desired_fields and field in constants:
            values[field], remainder = divmod(remainder, constants[field])
    return f.format(fmt, **values)


def convert_to_mp3(wav_file):
    mp3_file = wav_file.replace('.wav', '.mp3')
    print(f'In parallel, converting {wav_file} to {mp3_file}...')
    subprocess.Popen(['ffmpeg', '-i', wav_file, mp3_file, '&& rm', wav_file, '&& echo "mp3 convertion done."'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)


def create_m4b(filename):
    if shutil.which('ffmpeg') is None:
        return
    print('Creating M4B file...')
    filename_m4b = filename.replace('.epub', '.m4b')
    subprocess.run(['ffmpeg', '-i', 'concat:chapter_*.mp3', '-acodec', 'copy', f'{filename_m4b}'])
    print(f'{filename_m4b} created. Enjoy your audbiook.')


if __name__ == '__main__':
    voices = list(kokoro.get_voices())
    voices_str = ', '.join(voices)
    epilog = 'example:\n' + \
             '  audiblez book.epub -l en-us -v af_sky'
    default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
    parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('epub_file_path', help='Path to the epub file')
    parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
    parser.add_argument('-v', '--voice', default=default_voice, help=f'Choose narrating voice: {voices_str}')
    parser.add_argument('-w', '--wav', help="Output to .wav files (instead of mp3, which the default)", action='store_true')
    if len(sys.argv) == 1:
        parser.print_help(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
    main(args.epub_file_path, args.lang, args.voice)
m4b 2025-01-14 17:45:04 +01:00			`#!/usr/bin/env python3`
init 2025-01-14 15:35:10 +01:00			`# audiblez - A program to convert e-books into audiobooks using high-quality`
			`# Kokoro-82M text-to-speech model.`
			`# Distributed under the MIT License for educational purposes.`
			`# by Claudio Santini (2025) - https://claudio.uk`

			`import argparse`
m4b 2025-01-14 17:45:04 +01:00			`import sys`
init 2025-01-14 15:35:10 +01:00			`import time`
			`import shutil`
			`import subprocess`
			`import soundfile as sf`
			`import ebooklib`
			`import warnings`
			`from pathlib import Path`
			`from string import Formatter`
			`from bs4 import BeautifulSoup`
			`from kokoro_onnx import Kokoro`
			`from ebooklib import epub`

			`kokoro = Kokoro('kokoro-v0_19.onnx', 'voices.json')`


			`def main(file_path, lang, voice):`
m4b 2025-01-14 17:45:04 +01:00			`filename = Path(file_path).name`
init 2025-01-14 15:35:10 +01:00			`with warnings.catch_warnings():`
			`book = epub.read_epub(file_path)`
			`title = book.get_metadata('DC', 'title')[0][0]`
			`creator = book.get_metadata('DC', 'creator')[0][0]`
			`intro = f'{title} by {creator}'`
			`print(intro)`
			`chapters = find_chapters(book)`
			`print([c.get_name() for c in chapters])`
			`texts = extract_texts(chapters)`
			`has_ffmpeg = shutil.which('ffmpeg') is not None`
m4b 2025-01-14 17:45:04 +01:00			`if not has_ffmpeg:`
			`print('\033[91m' + 'ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files.' + '\033[0m')`
			`total_chars = sum([len(t) for t in texts])`
			`print('Started at:', time.strftime('%H:%M:%S'))`
			`print(f'Total characters: {total_chars:,}')`
			`print('Total words:', len(' '.join(texts).split(' ')))`
init 2025-01-14 15:35:10 +01:00
			`i = 1`
			`for text in texts:`
m4b 2025-01-14 17:45:04 +01:00			`chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav')`
init 2025-01-14 15:35:10 +01:00			`if Path(chapter_filename).exists() or Path(chapter_filename.replace('.wav', '.mp3')).exists():`
			`print(f'File for chapter {i} already exists. Skipping')`
			`i += 1`
			`continue`
			`print(f'Reading chapter {i} ({len(text):,} characters)...')`
m4b 2025-01-14 17:45:04 +01:00			`if i == 1:`
			`text = intro + '.\n\n' + text`
init 2025-01-14 15:35:10 +01:00			`start_time = time.time()`
			`samples, sample_rate = kokoro.create(text, voice=voice, speed=1.0, lang=lang)`
			`sf.write(f'{chapter_filename}', samples, sample_rate)`
			`end_time = time.time()`
			`delta_seconds = end_time - start_time`
m4b 2025-01-14 17:45:04 +01:00			`chars_per_sec = len(text) / delta_seconds`
init 2025-01-14 15:35:10 +01:00			`print('Chapter written to', chapter_filename)`
m4b 2025-01-14 17:45:04 +01:00			`print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)')`
			`remaining_chars = sum([len(t) for t in texts[i - 1:]])`
			`remaining_time = remaining_chars / chars_per_sec`
init 2025-01-14 15:35:10 +01:00			`print(f'Estimated time remaining: {strfdelta(remaining_time)}')`
			`print()`
m4b 2025-01-14 17:45:04 +01:00			`if has_ffmpeg:`
init 2025-01-14 15:35:10 +01:00			`convert_to_mp3(chapter_filename)`
			`i += 1`
m4b 2025-01-14 17:45:04 +01:00			`if has_ffmpeg:`
			`create_m4b(filename)`
init 2025-01-14 15:35:10 +01:00

			`def extract_texts(chapters):`
			`texts = []`
			`for chapter in chapters:`
			`xml = chapter.get_body_content()`
			`soup = BeautifulSoup(xml, features='lxml')`
			`chapter_text = ''`
			`html_content_tags = ['title', 'p', 'h1', 'h2', 'h3', 'h4']`
			`for child in soup.find_all(html_content_tags):`
			`inner_text = child.text.strip() if child.text else ""`
			`if inner_text:`
			`chapter_text += inner_text + '\n'`
			`texts.append(chapter_text)`
			`return texts`


			`def find_chapters(book, verbose=False):`
			`is_chapter = lambda c: 'chapter' in c.get_name().lower() or 'part' in c.get_name().lower()`
			`chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]`
			`if verbose:`
			`for item in book.get_items():`
			`if item.get_type() == ebooklib.ITEM_DOCUMENT:`
			`print((item.get_name(), len(item.get_body_content()), 'YES' if item in chapters else '-'))`
			`return chapters`


			`def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):`
			`remainder = int(tdelta)`
			`f = Formatter()`
			`desired_fields = [field_tuple[1] for field_tuple in f.parse(fmt)]`
			`possible_fields = ('W', 'D', 'H', 'M', 'S')`
			`constants = {'W': 604800, 'D': 86400, 'H': 3600, 'M': 60, 'S': 1}`
			`values = {}`
			`for field in possible_fields:`
			`if field in desired_fields and field in constants:`
			`values[field], remainder = divmod(remainder, constants[field])`
			`return f.format(fmt, **values)`


			`def convert_to_mp3(wav_file):`
m4b 2025-01-14 17:45:04 +01:00			`mp3_file = wav_file.replace('.wav', '.mp3')`
			`print(f'In parallel, converting {wav_file} to {mp3_file}...')`
			`subprocess.Popen(['ffmpeg', '-i', wav_file, mp3_file, '&& rm', wav_file, '&& echo "mp3 convertion done."'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)`


			`def create_m4b(filename):`
init 2025-01-14 15:35:10 +01:00			`if shutil.which('ffmpeg') is None:`
			`return`
m4b 2025-01-14 17:45:04 +01:00			`print('Creating M4B file...')`
			`filename_m4b = filename.replace('.epub', '.m4b')`
			`subprocess.run(['ffmpeg', '-i', 'concat:chapter_*.mp3', '-acodec', 'copy', f'{filename_m4b}'])`
			`print(f'{filename_m4b} created. Enjoy your audbiook.')`
init 2025-01-14 15:35:10 +01:00

			`if __name__ == '__main__':`
			`voices = list(kokoro.get_voices())`
			`voices_str = ', '.join(voices)`
			`epilog = 'example:\n' + \`
			`' audiblez book.epub -l en-us -v af_sky'`
m4b 2025-01-14 17:45:04 +01:00			`default_voice = 'af_sky' if 'af_sky' in voices else voices[0]`
init 2025-01-14 15:35:10 +01:00			`parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)`
			`parser.add_argument('epub_file_path', help='Path to the epub file')`
			`parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')`
m4b 2025-01-14 17:45:04 +01:00			`parser.add_argument('-v', '--voice', default=default_voice, help=f'Choose narrating voice: {voices_str}')`
			`parser.add_argument('-w', '--wav', help="Output to .wav files (instead of mp3, which the default)", action='store_true')`
			`if len(sys.argv) == 1:`
			`parser.print_help(sys.stderr)`
			`sys.exit(1)`
init 2025-01-14 15:35:10 +01:00			`args = parser.parse_args()`
			`main(args.epub_file_path, args.lang, args.voice)`