audiblez/audiblez.py

141 lines
5.6 KiB
Python
Raw Normal View History

2025-01-14 17:45:04 +01:00
#!/usr/bin/env python3
2025-01-14 15:35:10 +01:00
# audiblez - A program to convert e-books into audiobooks using high-quality
# Kokoro-82M text-to-speech model.
# Distributed under the MIT License for educational purposes.
# by Claudio Santini (2025) - https://claudio.uk
import argparse
2025-01-14 17:45:04 +01:00
import sys
2025-01-14 15:35:10 +01:00
import time
import shutil
import subprocess
import soundfile as sf
import ebooklib
import warnings
from pathlib import Path
from string import Formatter
from bs4 import BeautifulSoup
from kokoro_onnx import Kokoro
from ebooklib import epub
kokoro = Kokoro('kokoro-v0_19.onnx', 'voices.json')
def main(file_path, lang, voice):
2025-01-14 17:45:04 +01:00
filename = Path(file_path).name
2025-01-14 15:35:10 +01:00
with warnings.catch_warnings():
book = epub.read_epub(file_path)
title = book.get_metadata('DC', 'title')[0][0]
creator = book.get_metadata('DC', 'creator')[0][0]
intro = f'{title} by {creator}'
print(intro)
chapters = find_chapters(book)
print([c.get_name() for c in chapters])
texts = extract_texts(chapters)
has_ffmpeg = shutil.which('ffmpeg') is not None
2025-01-14 17:45:04 +01:00
if not has_ffmpeg:
print('\033[91m' + 'ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files.' + '\033[0m')
total_chars = sum([len(t) for t in texts])
print('Started at:', time.strftime('%H:%M:%S'))
print(f'Total characters: {total_chars:,}')
print('Total words:', len(' '.join(texts).split(' ')))
2025-01-14 15:35:10 +01:00
i = 1
for text in texts:
2025-01-14 17:45:04 +01:00
chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav')
2025-01-14 15:35:10 +01:00
if Path(chapter_filename).exists() or Path(chapter_filename.replace('.wav', '.mp3')).exists():
print(f'File for chapter {i} already exists. Skipping')
i += 1
continue
print(f'Reading chapter {i} ({len(text):,} characters)...')
2025-01-14 17:45:04 +01:00
if i == 1:
text = intro + '.\n\n' + text
2025-01-14 15:35:10 +01:00
start_time = time.time()
samples, sample_rate = kokoro.create(text, voice=voice, speed=1.0, lang=lang)
sf.write(f'{chapter_filename}', samples, sample_rate)
end_time = time.time()
delta_seconds = end_time - start_time
2025-01-14 17:45:04 +01:00
chars_per_sec = len(text) / delta_seconds
2025-01-14 15:35:10 +01:00
print('Chapter written to', chapter_filename)
2025-01-14 17:45:04 +01:00
print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)')
remaining_chars = sum([len(t) for t in texts[i - 1:]])
remaining_time = remaining_chars / chars_per_sec
2025-01-14 15:35:10 +01:00
print(f'Estimated time remaining: {strfdelta(remaining_time)}')
print()
2025-01-14 17:45:04 +01:00
if has_ffmpeg:
2025-01-14 15:35:10 +01:00
convert_to_mp3(chapter_filename)
i += 1
2025-01-14 17:45:04 +01:00
if has_ffmpeg:
create_m4b(filename)
2025-01-14 15:35:10 +01:00
def extract_texts(chapters):
texts = []
for chapter in chapters:
xml = chapter.get_body_content()
soup = BeautifulSoup(xml, features='lxml')
chapter_text = ''
html_content_tags = ['title', 'p', 'h1', 'h2', 'h3', 'h4']
for child in soup.find_all(html_content_tags):
inner_text = child.text.strip() if child.text else ""
if inner_text:
chapter_text += inner_text + '\n'
texts.append(chapter_text)
return texts
def find_chapters(book, verbose=False):
is_chapter = lambda c: 'chapter' in c.get_name().lower() or 'part' in c.get_name().lower()
chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]
if verbose:
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
print((item.get_name(), len(item.get_body_content()), 'YES' if item in chapters else '-'))
return chapters
def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
remainder = int(tdelta)
f = Formatter()
desired_fields = [field_tuple[1] for field_tuple in f.parse(fmt)]
possible_fields = ('W', 'D', 'H', 'M', 'S')
constants = {'W': 604800, 'D': 86400, 'H': 3600, 'M': 60, 'S': 1}
values = {}
for field in possible_fields:
if field in desired_fields and field in constants:
values[field], remainder = divmod(remainder, constants[field])
return f.format(fmt, **values)
def convert_to_mp3(wav_file):
2025-01-14 17:45:04 +01:00
mp3_file = wav_file.replace('.wav', '.mp3')
print(f'In parallel, converting {wav_file} to {mp3_file}...')
subprocess.Popen(['ffmpeg', '-i', wav_file, mp3_file, '&& rm', wav_file, '&& echo "mp3 convertion done."'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def create_m4b(filename):
2025-01-14 15:35:10 +01:00
if shutil.which('ffmpeg') is None:
return
2025-01-14 17:45:04 +01:00
print('Creating M4B file...')
filename_m4b = filename.replace('.epub', '.m4b')
subprocess.run(['ffmpeg', '-i', 'concat:chapter_*.mp3', '-acodec', 'copy', f'{filename_m4b}'])
print(f'{filename_m4b} created. Enjoy your audbiook.')
2025-01-14 15:35:10 +01:00
if __name__ == '__main__':
voices = list(kokoro.get_voices())
voices_str = ', '.join(voices)
epilog = 'example:\n' + \
' audiblez book.epub -l en-us -v af_sky'
2025-01-14 17:45:04 +01:00
default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
2025-01-14 15:35:10 +01:00
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('epub_file_path', help='Path to the epub file')
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
2025-01-14 17:45:04 +01:00
parser.add_argument('-v', '--voice', default=default_voice, help=f'Choose narrating voice: {voices_str}')
parser.add_argument('-w', '--wav', help="Output to .wav files (instead of mp3, which the default)", action='store_true')
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
2025-01-14 15:35:10 +01:00
args = parser.parse_args()
main(args.epub_file_path, args.lang, args.voice)