mirror of
https://github.com/santinic/audiblez.git
synced 2025-09-18 21:40:39 +00:00
init
This commit is contained in:
parent
b2e87192e9
commit
b78dc88b8a
3 changed files with 186 additions and 1 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,7 +1,6 @@
|
|||
venv
|
||||
*.wav
|
||||
*.mp3
|
||||
*.txt
|
||||
gene
|
||||
epub
|
||||
*.sh
|
||||
|
|
120
audiblez.py
Normal file
120
audiblez.py
Normal file
|
@ -0,0 +1,120 @@
|
|||
# audiblez - A program to convert e-books into audiobooks using high-quality
|
||||
# Kokoro-82M text-to-speech model.
|
||||
# Distributed under the MIT License for educational purposes.
|
||||
# by Claudio Santini (2025) - https://claudio.uk
|
||||
|
||||
import argparse
|
||||
import time
|
||||
import shutil
|
||||
import subprocess
|
||||
import soundfile as sf
|
||||
import ebooklib
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from string import Formatter
|
||||
from bs4 import BeautifulSoup
|
||||
from kokoro_onnx import Kokoro
|
||||
from ebooklib import epub
|
||||
|
||||
kokoro = Kokoro('kokoro-v0_19.onnx', 'voices.json')
|
||||
|
||||
|
||||
def main(file_path, lang, voice):
|
||||
file_name = Path(file_path).name
|
||||
with warnings.catch_warnings():
|
||||
book = epub.read_epub(file_path)
|
||||
title = book.get_metadata('DC', 'title')[0][0]
|
||||
creator = book.get_metadata('DC', 'creator')[0][0]
|
||||
intro = f'{title} by {creator}'
|
||||
print(intro)
|
||||
chapters = find_chapters(book)
|
||||
print([c.get_name() for c in chapters])
|
||||
texts = extract_texts(chapters)
|
||||
has_ffmpeg = shutil.which('ffmpeg') is not None
|
||||
use_fmmpeg = has_ffmpeg and not args.wav
|
||||
|
||||
i = 1
|
||||
for text in texts:
|
||||
chapter_filename = file_name.lower().replace('.epub', f'_chapter_{i}.wav')
|
||||
if Path(chapter_filename).exists() or Path(chapter_filename.replace('.wav', '.mp3')).exists():
|
||||
print(f'File for chapter {i} already exists. Skipping')
|
||||
i += 1
|
||||
continue
|
||||
print(f'Reading chapter {i} ({len(text):,} characters)...')
|
||||
start_time = time.time()
|
||||
samples, sample_rate = kokoro.create(text, voice=voice, speed=1.0, lang=lang)
|
||||
sf.write(f'{chapter_filename}', samples, sample_rate)
|
||||
end_time = time.time()
|
||||
delta_seconds = end_time - start_time
|
||||
charters_per_second = len(text) / delta_seconds
|
||||
print('Chapter written to', chapter_filename)
|
||||
print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({charters_per_second:.0f} charters per second')
|
||||
remaining_characters = sum([len(t) for t in texts[i - 1:]])
|
||||
remaining_time = remaining_characters / charters_per_second
|
||||
print(f'Estimated time remaining: {strfdelta(remaining_time)}')
|
||||
print()
|
||||
if use_fmmpeg:
|
||||
print(f'In parallel, converting chapter {i} to mp3...')
|
||||
convert_to_mp3(chapter_filename)
|
||||
i += 1
|
||||
|
||||
|
||||
def extract_texts(chapters):
|
||||
texts = []
|
||||
for chapter in chapters:
|
||||
xml = chapter.get_body_content()
|
||||
soup = BeautifulSoup(xml, features='lxml')
|
||||
chapter_text = ''
|
||||
html_content_tags = ['title', 'p', 'h1', 'h2', 'h3', 'h4']
|
||||
for child in soup.find_all(html_content_tags):
|
||||
inner_text = child.text.strip() if child.text else ""
|
||||
if inner_text:
|
||||
chapter_text += inner_text + '\n'
|
||||
texts.append(chapter_text)
|
||||
return texts
|
||||
|
||||
|
||||
def find_chapters(book, verbose=False):
|
||||
is_chapter = lambda c: 'chapter' in c.get_name().lower() or 'part' in c.get_name().lower()
|
||||
chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]
|
||||
if verbose:
|
||||
for item in book.get_items():
|
||||
if item.get_type() == ebooklib.ITEM_DOCUMENT:
|
||||
print((item.get_name(), len(item.get_body_content()), 'YES' if item in chapters else '-'))
|
||||
return chapters
|
||||
|
||||
|
||||
def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
|
||||
remainder = int(tdelta)
|
||||
f = Formatter()
|
||||
desired_fields = [field_tuple[1] for field_tuple in f.parse(fmt)]
|
||||
possible_fields = ('W', 'D', 'H', 'M', 'S')
|
||||
constants = {'W': 604800, 'D': 86400, 'H': 3600, 'M': 60, 'S': 1}
|
||||
values = {}
|
||||
for field in possible_fields:
|
||||
if field in desired_fields and field in constants:
|
||||
values[field], remainder = divmod(remainder, constants[field])
|
||||
return f.format(fmt, **values)
|
||||
|
||||
|
||||
def convert_to_mp3(wav_file):
|
||||
if shutil.which('ffmpeg') is None:
|
||||
print('ffmpeg not found. Please install ffmpeg to convert .wav files to .mp3')
|
||||
return
|
||||
mp3_file = wav_file.replace('.wav', '.mp3')
|
||||
print(f'Converting {wav_file} to {mp3_file}...')
|
||||
subprocess.Popen(['ffmpeg', '-i', wav_file, mp3_file, ' && rm ', wav_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
voices = list(kokoro.get_voices())
|
||||
voices_str = ', '.join(voices)
|
||||
epilog = 'example:\n' + \
|
||||
' audiblez book.epub -l en-us -v af_sky'
|
||||
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument('epub_file_path', help='Path to the epub file')
|
||||
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
|
||||
parser.add_argument('-v', '--voice', default=voices[0], help=f'Choose narrating voice: {voices_str}')
|
||||
parser.add_argument('-w', '--wav', help="Don't convert to .mp3, just create .wav files", action='store_true')
|
||||
args = parser.parse_args()
|
||||
main(args.epub_file_path, args.lang, args.voice)
|
66
requirements.txt
Normal file
66
requirements.txt
Normal file
|
@ -0,0 +1,66 @@
|
|||
attrs==24.3.0
|
||||
audioread==3.0.1
|
||||
babel==2.16.0
|
||||
beautifulsoup4==4.12.3
|
||||
bibtexparser==2.0.0b8
|
||||
bs4==0.0.2
|
||||
certifi==2024.12.14
|
||||
cffi==1.17.1
|
||||
charset-normalizer==3.4.1
|
||||
clldutils==3.24.0
|
||||
colorama==0.4.6
|
||||
coloredlogs==15.0.1
|
||||
colorlog==6.9.0
|
||||
csvw==3.5.1
|
||||
decorator==5.1.1
|
||||
dlinfo==1.2.1
|
||||
EbookLib==0.18
|
||||
espeakng-loader==0.2.1
|
||||
flatbuffers==24.12.23
|
||||
humanfriendly==10.0
|
||||
idna==3.10
|
||||
isodate==0.7.2
|
||||
joblib==1.4.2
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2024.10.1
|
||||
kokoro-onnx==0.2.6
|
||||
language-tags==1.2.0
|
||||
lazy_loader==0.4
|
||||
librosa==0.10.2.post1
|
||||
llvmlite==0.43.0
|
||||
lxml==5.3.0
|
||||
Markdown==3.7
|
||||
MarkupSafe==3.0.2
|
||||
mpmath==1.3.0
|
||||
msgpack==1.1.0
|
||||
numba==0.60.0
|
||||
numpy==2.0.2
|
||||
onnxruntime==1.20.1
|
||||
packaging==24.2
|
||||
phonemizer-fork==3.3.1
|
||||
platformdirs==4.3.6
|
||||
pooch==1.8.2
|
||||
protobuf==5.29.3
|
||||
pycparser==2.22
|
||||
pylatexenc==2.10
|
||||
pyparsing==3.2.1
|
||||
python-dateutil==2.9.0.post0
|
||||
rdflib==7.1.2
|
||||
referencing==0.35.1
|
||||
regex==2024.11.6
|
||||
requests==2.32.3
|
||||
rfc3986==1.5.0
|
||||
rpds-py==0.22.3
|
||||
scikit-learn==1.6.1
|
||||
scipy==1.15.1
|
||||
segments==2.2.1
|
||||
six==1.17.0
|
||||
soundfile==0.13.0
|
||||
soupsieve==2.6
|
||||
soxr==0.5.0.post1
|
||||
sympy==1.13.3
|
||||
tabulate==0.9.0
|
||||
threadpoolctl==3.5.0
|
||||
typing_extensions==4.12.2
|
||||
uritemplate==4.1.1
|
||||
urllib3==2.3.0
|
Loading…
Add table
Reference in a new issue