Merge branch 'main' into fix_exception_when_skipping_chapters

This commit is contained in:
Claudio Santini 2025-01-23 21:35:16 +01:00 committed by GitHub
commit 7745fcdf35
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 137 additions and 32 deletions

1
.gitignore vendored
View file

@ -8,3 +8,4 @@ epub
*.json *.json
*.onnx *.onnx
dist dist
.venv

View file

@ -33,14 +33,13 @@ audiblez book.epub -l en-gb -v af_sky
It will first create a bunch of `book_chapter_1.wav`, `book_chapter_2.wav`, etc. files in the same directory, It will first create a bunch of `book_chapter_1.wav`, `book_chapter_2.wav`, etc. files in the same directory,
and at the end it will produce a `book.m4b` file with the whole book you can listen with VLC or any and at the end it will produce a `book.m4b` file with the whole book you can listen with VLC or any
audiobook player. audiobook player.
It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine. It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine.
## Supported Languages ## Supported Languages
Use `-l` option to specify the language, available language codes are: Use `-l` option to specify the language, available language codes are:
🇺🇸 `en-us`, 🇬🇧 `en-gb`, 🇫🇷 `fr-fr`, 🇯🇵 `ja`, 🇰🇷 `kr` and 🇨🇳 `cmn`. 🇺🇸 `en-us`, 🇬🇧 `en-gb`, 🇫🇷 `fr-fr`, 🇯🇵 `ja`, 🇰🇷 `kr` and 🇨🇳 `cmn`.
## Speed ## Speed
By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0: By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0:
@ -53,6 +52,40 @@ Use `-v` option to specify the voice:
available voices are `af`, `af_bella`, `af_nicole`, `af_sarah`, `af_sky`, `am_adam`, `am_michael`, `bf_emma`, `bf_isabella`, `bm_george`, `bm_lewis`. available voices are `af`, `af_bella`, `af_nicole`, `af_sarah`, `af_sky`, `am_adam`, `am_michael`, `bf_emma`, `bf_isabella`, `bm_george`, `bm_lewis`.
You can try them here: [https://huggingface.co/spaces/hexgrad/Kokoro-TTS](https://huggingface.co/spaces/hexgrad/Kokoro-TTS) You can try them here: [https://huggingface.co/spaces/hexgrad/Kokoro-TTS](https://huggingface.co/spaces/hexgrad/Kokoro-TTS)
## How to run on GPU
By default audiblez runs on CPU. If you want to use a GPU for faster performance, install the GPU-enabled ONNX Runtime and specify a runtime provider with the `--providers` flag. By default, the CPU-enabled ONNX Runtime is installed. The GPU runtime must be installed manually.
```bash
pip install onnxruntime-gpu
```
To specify ONNX providers, such as using an NVIDIA GPU, use the `--providers` tag. For example:
```bash
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider
```
To see the list of available providers on your system, run the following:
```bash
audiblez --help
```
or
```bash
python -c "import onnxruntime as ort; print(ort.get_available_providers())"
```
This will display the ONNX providers that can be used, such as `CUDAExecutionProvider` for NVIDIA GPUs or `CPUExecutionProvider` for CPU-only execution.
You can specify a provider hierarchy by providing multiple hierarchies separated by spaces.
```bash
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider CPUExecutionProvider
```
## Author ## Author
by [Claudio Santini](https://claudio.uk) in 2025, distributed under MIT licence. by [Claudio Santini](https://claudio.uk) in 2025, distributed under MIT licence.

View file

@ -15,18 +15,39 @@ import re
from pathlib import Path from pathlib import Path
from string import Formatter from string import Formatter
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from kokoro_onnx import config
from kokoro_onnx import Kokoro from kokoro_onnx import Kokoro
from ebooklib import epub from ebooklib import epub
from pydub import AudioSegment from pydub import AudioSegment
from pick import pick from pick import pick
import onnxruntime as ort
from tempfile import NamedTemporaryFile
config.MAX_PHONEME_LENGTH = 128
def main(kokoro, file_path, lang, voice, pick_manually, speed): def main(kokoro, file_path, lang, voice, pick_manually, speed, providers):
# Set ONNX providers if specified
if providers:
available_providers = ort.get_available_providers()
invalid_providers = [p for p in providers if p not in available_providers]
if invalid_providers:
print(f"Invalid ONNX providers: {', '.join(invalid_providers)}")
print(f"Available providers: {', '.join(available_providers)}")
sys.exit(1)
kokoro.sess.set_providers(providers)
print(f"Using ONNX providers: {', '.join(providers)}")
filename = Path(file_path).name filename = Path(file_path).name
with warnings.catch_warnings(): with warnings.catch_warnings():
book = epub.read_epub(file_path) book = epub.read_epub(file_path)
title = book.get_metadata('DC', 'title')[0][0] title = book.get_metadata('DC', 'title')[0][0]
creator = book.get_metadata('DC', 'creator')[0][0] creator = book.get_metadata('DC', 'creator')[0][0]
cover_maybe = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_COVER]
cover_image = cover_maybe[0].get_content() if cover_maybe else b""
if cover_maybe:
print(f'Found cover image {cover_maybe[0].file_name} in {cover_maybe[0].media_type} format')
intro = f'{title} by {creator}' intro = f'{title} by {creator}'
print(intro) print(intro)
print('Found Chapters:', [c.get_name() for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT]) print('Found Chapters:', [c.get_name() for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT])
@ -36,18 +57,22 @@ def main(kokoro, file_path, lang, voice, pick_manually, speed):
chapters = find_chapters(book) chapters = find_chapters(book)
print('Selected chapters:', [c.get_name() for c in chapters]) print('Selected chapters:', [c.get_name() for c in chapters])
texts = extract_texts(chapters) texts = extract_texts(chapters)
has_ffmpeg = shutil.which('ffmpeg') is not None has_ffmpeg = shutil.which('ffmpeg') is not None
if not has_ffmpeg: if not has_ffmpeg:
print('\033[91m' + 'ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files.' + '\033[0m') print('\033[91m' + 'ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files.' + '\033[0m')
total_chars = sum([len(t) for t in texts])
total_chars, processed_chars = sum(map(len, texts)), 0
print('Started at:', time.strftime('%H:%M:%S')) print('Started at:', time.strftime('%H:%M:%S'))
print(f'Total characters: {total_chars:,}') print(f'Total characters: {total_chars:,}')
print('Total words:', len(' '.join(texts).split(' '))) print('Total words:', len(' '.join(texts).split()))
i = 1
chapter_mp3_files = [] chapter_mp3_files = []
for text in texts: durations = {}
if len(text) == 0:
for i, text in enumerate(texts, start=1):
if len(text.strip()) < 10:
print(f'Skipping empty chapter {i}')
continue continue
chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav') chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav')
chapter_mp3_files.append(chapter_filename) chapter_mp3_files.append(chapter_filename)
@ -60,25 +85,30 @@ def main(kokoro, file_path, lang, voice, pick_manually, speed):
i += 1 i += 1
chapter_mp3_files.remove(chapter_filename) chapter_mp3_files.remove(chapter_filename)
continue continue
print(f'Reading chapter {i} ({len(text):,} characters)...') print(f'Reading chapter {i} ({len(text):,} characters)...')
if i == 1: if i == 1:
text = intro + '.\n\n' + text text = intro + '.\n\n' + text
start_time = time.time() start_time = time.time()
samples, sample_rate = kokoro.create(text, voice=voice, speed=speed, lang=lang) samples, sample_rate = kokoro.create(text, voice=voice, speed=speed, lang=lang)
sf.write(f'{chapter_filename}', samples, sample_rate) sf.write(f'{chapter_filename}', samples, sample_rate)
durations[chapter_filename] = len(samples)/sample_rate
end_time = time.time() end_time = time.time()
delta_seconds = end_time - start_time delta_seconds = end_time - start_time
chars_per_sec = len(text) / delta_seconds chars_per_sec = len(text) / delta_seconds
remaining_chars = sum([len(t) for t in texts[i - 1:]]) processed_chars += len(text)
remaining_chars = total_chars - processed_chars
remaining_time = remaining_chars / chars_per_sec remaining_time = remaining_chars / chars_per_sec
print(f'Estimated time remaining: {strfdelta(remaining_time)}') print(f'Estimated time remaining: {strfdelta(remaining_time)}')
print('Chapter written to', chapter_filename) print('Chapter written to', chapter_filename)
print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)') print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)')
progress = int((total_chars - remaining_chars) / total_chars * 100) progress = processed_chars * 100 // total_chars
print('Progress:', f'{progress}%') print('Progress:', f'{progress}%')
i += 1
if has_ffmpeg: if has_ffmpeg:
create_m4b(chapter_mp3_files, filename, title, creator) create_index_file(title, creator, chapter_mp3_files, durations)
create_m4b(chapter_mp3_files, filename, title, creator, cover_image)
def extract_texts(chapters): def extract_texts(chapters):
@ -98,17 +128,12 @@ def extract_texts(chapters):
def is_chapter(c): def is_chapter(c):
name = c.get_name().lower() name = c.get_name().lower()
part = r"part\d{1,3}" return bool(
if re.search(part, name): 'chapter' in name.lower()
return True or re.search(r'part\d{1,3}', name)
ch = r"ch\d{1,3}" or re.search(r'ch\d{1,3}', name)
if re.search(ch, name): or re.search(r'chap\d{1,3}', name)
return True )
chap = r"chap\d{1,3}"
if re.search(chap, name):
return True
if 'chapter' in name:
return True
def find_chapters(book, verbose=False): def find_chapters(book, verbose=False):
@ -146,8 +171,8 @@ def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
return f.format(fmt, **values) return f.format(fmt, **values)
def create_m4b(chapter_files, filename, title, author): def create_m4b(chapter_files, filename, title, author, cover_image):
tmp_filename = filename.replace('.epub', '.tmp.m4a') tmp_filename = filename.replace('.epub', '.tmp.mp4')
if not Path(tmp_filename).exists(): if not Path(tmp_filename).exists():
combined_audio = AudioSegment.empty() combined_audio = AudioSegment.empty()
for wav_file in chapter_files: for wav_file in chapter_files:
@ -157,10 +182,28 @@ def create_m4b(chapter_files, filename, title, author):
combined_audio.export(tmp_filename, format="mp4", codec="aac", bitrate="64k") combined_audio.export(tmp_filename, format="mp4", codec="aac", bitrate="64k")
final_filename = filename.replace('.epub', '.m4b') final_filename = filename.replace('.epub', '.m4b')
print('Creating M4B file...') print('Creating M4B file...')
if cover_image:
cover_image_file = NamedTemporaryFile("wb")
cover_image_file.write(cover_image)
cover_image_args = ["-i", cover_image_file.name, "-map", "0:a", "-map", "1:v"]
else:
cover_image_args = []
proc = subprocess.run([ proc = subprocess.run([
'ffmpeg', '-i', f'{tmp_filename}', '-c', 'copy', '-f', 'mp4', 'ffmpeg',
'-metadata', f'title={title}', '-i', f'{tmp_filename}',
'-metadata', f'author={author}', '-i', 'chapters.txt',
#'-map', '0',
#'-map_metadata', '1',
*cover_image_args,
'-c:a', 'copy',
'-c:v', 'copy',
'-disposition:v', 'attached_pic',
'-metadata:s:v', f'title={title}',
'-metadata', f'artist={author}',
'-c', 'copy',
'-f', 'mp4',
f'{final_filename}' f'{final_filename}'
]) ])
Path(tmp_filename).unlink() Path(tmp_filename).unlink()
@ -168,19 +211,45 @@ def create_m4b(chapter_files, filename, title, author):
print(f'{final_filename} created. Enjoy your audiobook.') print(f'{final_filename} created. Enjoy your audiobook.')
print('Feel free to delete the intermediary .wav chapter files, the .m4b is all you need.') print('Feel free to delete the intermediary .wav chapter files, the .m4b is all you need.')
def probe_duration(file_name):
args = ['ffprobe', '-i', file_name, '-show_entries', 'format=duration', '-v', 'quiet', '-of', 'default=noprint_wrappers=1:nokey=1']
proc = subprocess.run(args, capture_output=True, text=True, check=True)
return float(proc.stdout.strip())
def create_index_file(title, creator, chapter_mp3_files, durations):
with open("chapters.txt", "w") as f:
f.write(f";FFMETADATA1\ntitle={title}\nartist={creator}\n\n")
start = 0
i = 0
for c in chapter_mp3_files:
if c not in durations:
durations[c] = probe_duration(c)
end = start + (int)(durations[c] * 1000)
f.write(f"[CHAPTER]\nTIMEBASE=1/1000\nSTART={start}\nEND={end}\ntitle=Chapter {i}\n\n")
i += 1
start = end
def cli_main(): def cli_main():
if not Path('kokoro-v0_19.onnx').exists() or not Path('voices.json').exists(): MODEL_NAME = 'kokoro-v0_19.onnx'
CUDA_PROVIDER = "CUDAExecutionProvider"
VOICES = 'voices.json'
if not Path(MODEL_NAME).exists() or not Path(VOICES).exists():
print('Error: kokoro-v0_19.onnx and voices.json must be in the current directory. Please download them with:') print('Error: kokoro-v0_19.onnx and voices.json must be in the current directory. Please download them with:')
print('wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx') print('wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx')
print('wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.json') print('wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.json')
sys.exit(1) sys.exit(1)
kokoro = Kokoro('kokoro-v0_19.onnx', 'voices.json') kokoro = Kokoro(MODEL_NAME, VOICES)
voices = list(kokoro.get_voices()) voices = list(kokoro.get_voices())
voices_str = ', '.join(voices) voices_str = ', '.join(voices)
epilog = 'example:\n' + \ epilog = 'example:\n' + \
' audiblez book.epub -l en-us -v af_sky' ' audiblez book.epub -l en-us -v af_sky'
default_voice = 'af_sky' if 'af_sky' in voices else voices[0] default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
# Get available ONNX providers
available_providers = ort.get_available_providers()
providers_help = f"Available ONNX providers: {', '.join(available_providers)}"
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter) parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('epub_file_path', help='Path to the epub file') parser.add_argument('epub_file_path', help='Path to the epub file')
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn') parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
@ -188,11 +257,13 @@ def cli_main():
parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook', parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook',
action='store_true') action='store_true')
parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float) parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float)
parser.add_argument('--providers', nargs='+', metavar='PROVIDER', help=f"Specify ONNX providers. {providers_help}")
if len(sys.argv) == 1: if len(sys.argv) == 1:
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
args = parser.parse_args() args = parser.parse_args()
main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed) main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed, args.providers)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -1,6 +1,6 @@
[project] [project]
name = "audiblez" name = "audiblez"
version = "0.1.12" version = "0.2.0"
description = "Generate audiobooks from e-books (epub to wav/m4b)" description = "Generate audiobooks from e-books (epub to wav/m4b)"
authors = [ authors = [
{ name = "Claudio Santini", email = "hireclaudio@gmail.com" } { name = "Claudio Santini", email = "hireclaudio@gmail.com" }