2025-01-14 17:45:04 +01:00
#!/usr/bin/env python3
2025-01-14 22:57:31 +01:00
# audiblez - A program to convert e-books into audiobooks using
# Kokoro-82M model for high-quality text-to-speech synthesis.
# by Claudio Santini 2025 - https://claudio.uk
2025-01-14 15:35:10 +01:00
import argparse
2025-01-14 17:45:04 +01:00
import sys
2025-01-14 15:35:10 +01:00
import time
import shutil
import subprocess
import soundfile as sf
import ebooklib
import warnings
2025-01-15 09:31:50 +01:00
import re
2025-01-14 15:35:10 +01:00
from pathlib import Path
from string import Formatter
from bs4 import BeautifulSoup
2025-01-15 23:36:52 +01:00
from kokoro_onnx import config
2025-01-14 15:35:10 +01:00
from kokoro_onnx import Kokoro
from ebooklib import epub
2025-01-14 18:38:26 +01:00
from pydub import AudioSegment
2025-01-15 19:12:48 +01:00
from pick import pick
2025-01-14 15:35:10 +01:00
2025-01-15 23:36:52 +01:00
config . MAX_PHONEME_LENGTH = 128
2025-01-14 15:35:10 +01:00
2025-01-15 19:12:48 +01:00
def main ( kokoro , file_path , lang , voice , pick_manually ) :
2025-01-14 17:45:04 +01:00
filename = Path ( file_path ) . name
2025-01-14 15:35:10 +01:00
with warnings . catch_warnings ( ) :
book = epub . read_epub ( file_path )
title = book . get_metadata ( ' DC ' , ' title ' ) [ 0 ] [ 0 ]
creator = book . get_metadata ( ' DC ' , ' creator ' ) [ 0 ] [ 0 ]
intro = f ' { title } by { creator } '
print ( intro )
2025-01-15 19:12:48 +01:00
print ( ' Found Chapters: ' , [ c . get_name ( ) for c in book . get_items ( ) if c . get_type ( ) == ebooklib . ITEM_DOCUMENT ] )
if pick_manually :
chapters = pick_chapters ( book )
else :
chapters = find_chapters ( book )
print ( ' Selected chapters: ' , [ c . get_name ( ) for c in chapters ] )
2025-01-14 15:35:10 +01:00
texts = extract_texts ( chapters )
has_ffmpeg = shutil . which ( ' ffmpeg ' ) is not None
2025-01-14 17:45:04 +01:00
if not has_ffmpeg :
print ( ' \033 [91m ' + ' ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files. ' + ' \033 [0m ' )
total_chars = sum ( [ len ( t ) for t in texts ] )
print ( ' Started at: ' , time . strftime ( ' % H: % M: % S ' ) )
print ( f ' Total characters: { total_chars : , } ' )
print ( ' Total words: ' , len ( ' ' . join ( texts ) . split ( ' ' ) ) )
2025-01-14 15:35:10 +01:00
i = 1
2025-01-14 18:38:26 +01:00
chapter_mp3_files = [ ]
2025-01-16 07:52:13 +01:00
durations = { }
2025-01-14 15:35:10 +01:00
for text in texts :
2025-01-15 12:37:33 +01:00
if len ( text ) == 0 :
continue
2025-01-14 17:45:04 +01:00
chapter_filename = filename . replace ( ' .epub ' , f ' _chapter_ { i } .wav ' )
2025-01-14 18:38:26 +01:00
chapter_mp3_files . append ( chapter_filename )
2025-01-14 18:41:15 +01:00
if Path ( chapter_filename ) . exists ( ) :
2025-01-14 15:35:10 +01:00
print ( f ' File for chapter { i } already exists. Skipping ' )
i + = 1
continue
print ( f ' Reading chapter { i } ( { len ( text ) : , } characters)... ' )
2025-01-14 17:45:04 +01:00
if i == 1 :
text = intro + ' . \n \n ' + text
2025-01-14 15:35:10 +01:00
start_time = time . time ( )
samples , sample_rate = kokoro . create ( text , voice = voice , speed = 1.0 , lang = lang )
sf . write ( f ' { chapter_filename } ' , samples , sample_rate )
2025-01-16 07:52:13 +01:00
durations [ chapter_filename ] = len ( samples ) / sample_rate
2025-01-14 15:35:10 +01:00
end_time = time . time ( )
delta_seconds = end_time - start_time
2025-01-14 17:45:04 +01:00
chars_per_sec = len ( text ) / delta_seconds
remaining_chars = sum ( [ len ( t ) for t in texts [ i - 1 : ] ] )
remaining_time = remaining_chars / chars_per_sec
2025-01-14 15:35:10 +01:00
print ( f ' Estimated time remaining: { strfdelta ( remaining_time ) } ' )
2025-01-14 19:04:45 +01:00
print ( ' Chapter written to ' , chapter_filename )
print ( f ' Chapter { i } read in { delta_seconds : .2f } seconds ( { chars_per_sec : .0f } characters per second) ' )
2025-01-14 18:38:26 +01:00
progress = int ( ( total_chars - remaining_chars ) / total_chars * 100 )
2025-01-14 19:04:45 +01:00
print ( ' Progress: ' , f ' { progress } % ' )
2025-01-14 15:35:10 +01:00
i + = 1
2025-01-14 17:45:04 +01:00
if has_ffmpeg :
2025-01-16 07:52:13 +01:00
create_index_file ( title , creator , chapter_mp3_files , durations )
2025-01-14 18:38:26 +01:00
create_m4b ( chapter_mp3_files , filename )
2025-01-14 15:35:10 +01:00
def extract_texts ( chapters ) :
texts = [ ]
for chapter in chapters :
xml = chapter . get_body_content ( )
soup = BeautifulSoup ( xml , features = ' lxml ' )
chapter_text = ' '
html_content_tags = [ ' title ' , ' p ' , ' h1 ' , ' h2 ' , ' h3 ' , ' h4 ' ]
for child in soup . find_all ( html_content_tags ) :
inner_text = child . text . strip ( ) if child . text else " "
if inner_text :
chapter_text + = inner_text + ' \n '
texts . append ( chapter_text )
return texts
2025-01-15 09:31:50 +01:00
def is_chapter ( c ) :
name = c . get_name ( ) . lower ( )
part = r " part \ d { 1,3} "
if re . search ( part , name ) :
return True
ch = r " ch \ d { 1,3} "
if re . search ( ch , name ) :
return True
if ' chapter ' in name :
return True
2025-01-15 19:12:48 +01:00
def find_chapters ( book , verbose = False ) :
2025-01-14 15:35:10 +01:00
chapters = [ c for c in book . get_items ( ) if c . get_type ( ) == ebooklib . ITEM_DOCUMENT and is_chapter ( c ) ]
if verbose :
for item in book . get_items ( ) :
if item . get_type ( ) == ebooklib . ITEM_DOCUMENT :
2025-01-15 11:00:59 +01:00
print ( f " ' { item . get_name ( ) } ' " + ' , # ' + str ( len ( item . get_body_content ( ) ) ) )
# print(f'{item.get_name()}'.ljust(60), str(len(item.get_body_content())).ljust(15), 'X' if item in chapters else '-')
if len ( chapters ) == 0 :
print ( ' Not easy to find the chapters, defaulting to all available documents. ' )
chapters = [ c for c in book . get_items ( ) if c . get_type ( ) == ebooklib . ITEM_DOCUMENT ]
2025-01-14 15:35:10 +01:00
return chapters
2025-01-15 19:12:48 +01:00
def pick_chapters ( book ) :
all_chapters_names = [ c . get_name ( ) for c in book . get_items ( ) ]
title = ' Select which chapters to read in the audiobook '
selected_chapters_names = pick ( all_chapters_names , title , multiselect = True , min_selection_count = 1 )
selected_chapters_names = [ c [ 0 ] for c in selected_chapters_names ]
selected_chapters = [ c for c in book . get_items ( ) if c . get_name ( ) in selected_chapters_names ]
return selected_chapters
2025-01-14 15:35:10 +01:00
def strfdelta ( tdelta , fmt = ' {D:02} d {H:02} h {M:02} m {S:02} s ' ) :
remainder = int ( tdelta )
f = Formatter ( )
desired_fields = [ field_tuple [ 1 ] for field_tuple in f . parse ( fmt ) ]
possible_fields = ( ' W ' , ' D ' , ' H ' , ' M ' , ' S ' )
constants = { ' W ' : 604800 , ' D ' : 86400 , ' H ' : 3600 , ' M ' : 60 , ' S ' : 1 }
values = { }
for field in possible_fields :
if field in desired_fields and field in constants :
values [ field ] , remainder = divmod ( remainder , constants [ field ] )
return f . format ( fmt , * * values )
2025-01-16 07:52:13 +01:00
def create_m4b ( chapter_files , filename ) :
2025-01-14 18:38:26 +01:00
tmp_filename = filename . replace ( ' .epub ' , ' .tmp.m4a ' )
if not Path ( tmp_filename ) . exists ( ) :
combined_audio = AudioSegment . empty ( )
2025-01-16 07:52:13 +01:00
for wav_file in chapter_files :
2025-01-14 18:38:26 +01:00
audio = AudioSegment . from_wav ( wav_file )
combined_audio + = audio
2025-01-14 18:41:15 +01:00
print ( ' Converting to Mp4... ' )
2025-01-14 18:38:26 +01:00
combined_audio . export ( tmp_filename , format = " mp4 " , codec = " aac " , bitrate = " 64k " )
final_filename = filename . replace ( ' .epub ' , ' .m4b ' )
2025-01-14 17:45:04 +01:00
print ( ' Creating M4B file... ' )
2025-01-16 07:52:13 +01:00
proc = subprocess . run ( [ ' ffmpeg ' , ' -i ' , f ' { tmp_filename } ' , ' -i ' , ' chapters.txt ' , ' -map ' , ' 0 ' , ' -map_metadata ' , ' 1 ' , ' -c ' , ' copy ' , ' -f ' , ' mp4 ' , f ' { final_filename } ' ] )
2025-01-14 18:38:26 +01:00
Path ( tmp_filename ) . unlink ( )
if proc . returncode == 0 :
print ( f ' { final_filename } created. Enjoy your audiobook. ' )
print ( ' Feel free to delete the intermediary .wav chapter files, the .m4b is all you need. ' )
2025-01-14 15:35:10 +01:00
2025-01-16 07:52:13 +01:00
def probe_duration ( file_name ) :
args = [ ' ffprobe ' , ' -i ' , file_name , ' -show_entries ' , ' format=duration ' , ' -v ' , ' quiet ' , ' -of ' , ' default=noprint_wrappers=1:nokey=1 ' ]
proc = subprocess . run ( args , capture_output = True , text = True , check = True )
return float ( proc . stdout . strip ( ) )
def create_index_file ( title , creator , chapter_mp3_files , durations ) :
with open ( " chapters.txt " , " w " ) as f :
f . write ( f " ;FFMETADATA1 \n title= { title } \n artist= { creator } \n \n " )
start = 0
i = 0
for c in chapter_mp3_files :
if c not in durations :
durations [ c ] = probe_duration ( c )
end = start + ( int ) ( durations [ c ] * 1000 )
f . write ( f " [CHAPTER] \n TIMEBASE=1/1000 \n START= { start } \n END= { end } \n title=Chapter { i } \n \n " )
i + = 1
start = end
2025-01-14 15:35:10 +01:00
2025-01-15 00:06:05 +01:00
def cli_main ( ) :
2025-01-15 23:36:52 +01:00
MODEL_NAME = ' kokoro-v0_19.onnx '
CUDA_PROVIDER = " CUDAExecutionProvider "
VOICES = ' voices.json '
if not Path ( MODEL_NAME ) . exists ( ) or not Path ( VOICES ) . exists ( ) :
2025-01-14 23:22:06 +01:00
print ( ' Error: kokoro-v0_19.onnx and voices.json must be in the current directory. Please download them with: ' )
print ( ' wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx ' )
print ( ' wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.json ' )
sys . exit ( 1 )
2025-01-15 23:36:52 +01:00
kokoro = Kokoro ( MODEL_NAME , VOICES )
2025-01-14 15:35:10 +01:00
voices = list ( kokoro . get_voices ( ) )
voices_str = ' , ' . join ( voices )
epilog = ' example: \n ' + \
' audiblez book.epub -l en-us -v af_sky '
2025-01-14 17:45:04 +01:00
default_voice = ' af_sky ' if ' af_sky ' in voices else voices [ 0 ]
2025-01-14 15:35:10 +01:00
parser = argparse . ArgumentParser ( epilog = epilog , formatter_class = argparse . RawDescriptionHelpFormatter )
parser . add_argument ( ' epub_file_path ' , help = ' Path to the epub file ' )
parser . add_argument ( ' -l ' , ' --lang ' , default = ' en-gb ' , help = ' Language code: en-gb, en-us, fr-fr, ja, ko, cmn ' )
2025-01-14 17:45:04 +01:00
parser . add_argument ( ' -v ' , ' --voice ' , default = default_voice , help = f ' Choose narrating voice: { voices_str } ' )
2025-01-16 08:10:57 +01:00
parser . add_argument ( ' -c ' , ' --cuda ' , default = False , help = ' Use cuda acceleration. Requires onnxruntime-gpu nvidia-cudnn-cu12. Set LD_LIBRARY_PATH=$venv/lib/python3.12/site-packages/nvidia/cudnn/lib/ ' , action = ' store_true ' )
2025-01-16 07:52:13 +01:00
parser . add_argument ( ' -p ' , ' --pick ' , default = False , help = ' Manually select which chapters to read in the audiobook ' ,
2025-01-15 19:12:48 +01:00
action = ' store_true ' )
2025-01-14 17:45:04 +01:00
if len ( sys . argv ) == 1 :
parser . print_help ( sys . stderr )
sys . exit ( 1 )
2025-01-14 15:35:10 +01:00
args = parser . parse_args ( )
2025-01-15 23:36:52 +01:00
if args . cuda :
from onnxruntime import InferenceSession
session = InferenceSession ( MODEL_NAME , providers = [ CUDA_PROVIDER ] )
kokoro = Kokoro . from_session ( session , VOICES )
2025-01-15 19:12:48 +01:00
main ( kokoro , args . epub_file_path , args . lang , args . voice , args . pick )
2025-01-15 00:06:05 +01:00
if __name__ == ' __main__ ' :
cli_main ( )