mirror of
https://github.com/santinic/audiblez.git
synced 2025-08-05 16:48:55 +00:00
Merge branch 'main' into fix_exception_when_skipping_chapters
This commit is contained in:
commit
7745fcdf35
4 changed files with 137 additions and 32 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -8,3 +8,4 @@ epub
|
||||||
*.json
|
*.json
|
||||||
*.onnx
|
*.onnx
|
||||||
dist
|
dist
|
||||||
|
.venv
|
||||||
|
|
37
README.md
37
README.md
|
@ -33,14 +33,13 @@ audiblez book.epub -l en-gb -v af_sky
|
||||||
|
|
||||||
It will first create a bunch of `book_chapter_1.wav`, `book_chapter_2.wav`, etc. files in the same directory,
|
It will first create a bunch of `book_chapter_1.wav`, `book_chapter_2.wav`, etc. files in the same directory,
|
||||||
and at the end it will produce a `book.m4b` file with the whole book you can listen with VLC or any
|
and at the end it will produce a `book.m4b` file with the whole book you can listen with VLC or any
|
||||||
audiobook player.
|
audiobook player.
|
||||||
It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine.
|
It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine.
|
||||||
|
|
||||||
## Supported Languages
|
## Supported Languages
|
||||||
Use `-l` option to specify the language, available language codes are:
|
Use `-l` option to specify the language, available language codes are:
|
||||||
🇺🇸 `en-us`, 🇬🇧 `en-gb`, 🇫🇷 `fr-fr`, 🇯🇵 `ja`, 🇰🇷 `kr` and 🇨🇳 `cmn`.
|
🇺🇸 `en-us`, 🇬🇧 `en-gb`, 🇫🇷 `fr-fr`, 🇯🇵 `ja`, 🇰🇷 `kr` and 🇨🇳 `cmn`.
|
||||||
|
|
||||||
|
|
||||||
## Speed
|
## Speed
|
||||||
By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0:
|
By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0:
|
||||||
|
|
||||||
|
@ -53,6 +52,40 @@ Use `-v` option to specify the voice:
|
||||||
available voices are `af`, `af_bella`, `af_nicole`, `af_sarah`, `af_sky`, `am_adam`, `am_michael`, `bf_emma`, `bf_isabella`, `bm_george`, `bm_lewis`.
|
available voices are `af`, `af_bella`, `af_nicole`, `af_sarah`, `af_sky`, `am_adam`, `am_michael`, `bf_emma`, `bf_isabella`, `bm_george`, `bm_lewis`.
|
||||||
You can try them here: [https://huggingface.co/spaces/hexgrad/Kokoro-TTS](https://huggingface.co/spaces/hexgrad/Kokoro-TTS)
|
You can try them here: [https://huggingface.co/spaces/hexgrad/Kokoro-TTS](https://huggingface.co/spaces/hexgrad/Kokoro-TTS)
|
||||||
|
|
||||||
|
|
||||||
|
## How to run on GPU
|
||||||
|
By default audiblez runs on CPU. If you want to use a GPU for faster performance, install the GPU-enabled ONNX Runtime and specify a runtime provider with the `--providers` flag. By default, the CPU-enabled ONNX Runtime is installed. The GPU runtime must be installed manually.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install onnxruntime-gpu
|
||||||
|
```
|
||||||
|
|
||||||
|
To specify ONNX providers, such as using an NVIDIA GPU, use the `--providers` tag. For example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider
|
||||||
|
```
|
||||||
|
|
||||||
|
To see the list of available providers on your system, run the following:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
audiblez --help
|
||||||
|
```
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -c "import onnxruntime as ort; print(ort.get_available_providers())"
|
||||||
|
```
|
||||||
|
|
||||||
|
This will display the ONNX providers that can be used, such as `CUDAExecutionProvider` for NVIDIA GPUs or `CPUExecutionProvider` for CPU-only execution.
|
||||||
|
|
||||||
|
You can specify a provider hierarchy by providing multiple hierarchies separated by spaces.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider CPUExecutionProvider
|
||||||
|
```
|
||||||
|
|
||||||
## Author
|
## Author
|
||||||
by [Claudio Santini](https://claudio.uk) in 2025, distributed under MIT licence.
|
by [Claudio Santini](https://claudio.uk) in 2025, distributed under MIT licence.
|
||||||
|
|
||||||
|
|
129
audiblez.py
129
audiblez.py
|
@ -15,18 +15,39 @@ import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from string import Formatter
|
from string import Formatter
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from kokoro_onnx import config
|
||||||
from kokoro_onnx import Kokoro
|
from kokoro_onnx import Kokoro
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
from pick import pick
|
from pick import pick
|
||||||
|
import onnxruntime as ort
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
|
|
||||||
|
config.MAX_PHONEME_LENGTH = 128
|
||||||
|
|
||||||
|
|
||||||
def main(kokoro, file_path, lang, voice, pick_manually, speed):
|
def main(kokoro, file_path, lang, voice, pick_manually, speed, providers):
|
||||||
|
# Set ONNX providers if specified
|
||||||
|
if providers:
|
||||||
|
available_providers = ort.get_available_providers()
|
||||||
|
invalid_providers = [p for p in providers if p not in available_providers]
|
||||||
|
if invalid_providers:
|
||||||
|
print(f"Invalid ONNX providers: {', '.join(invalid_providers)}")
|
||||||
|
print(f"Available providers: {', '.join(available_providers)}")
|
||||||
|
sys.exit(1)
|
||||||
|
kokoro.sess.set_providers(providers)
|
||||||
|
print(f"Using ONNX providers: {', '.join(providers)}")
|
||||||
filename = Path(file_path).name
|
filename = Path(file_path).name
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
book = epub.read_epub(file_path)
|
book = epub.read_epub(file_path)
|
||||||
title = book.get_metadata('DC', 'title')[0][0]
|
title = book.get_metadata('DC', 'title')[0][0]
|
||||||
creator = book.get_metadata('DC', 'creator')[0][0]
|
creator = book.get_metadata('DC', 'creator')[0][0]
|
||||||
|
|
||||||
|
cover_maybe = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_COVER]
|
||||||
|
cover_image = cover_maybe[0].get_content() if cover_maybe else b""
|
||||||
|
if cover_maybe:
|
||||||
|
print(f'Found cover image {cover_maybe[0].file_name} in {cover_maybe[0].media_type} format')
|
||||||
|
|
||||||
intro = f'{title} by {creator}'
|
intro = f'{title} by {creator}'
|
||||||
print(intro)
|
print(intro)
|
||||||
print('Found Chapters:', [c.get_name() for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT])
|
print('Found Chapters:', [c.get_name() for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT])
|
||||||
|
@ -36,18 +57,22 @@ def main(kokoro, file_path, lang, voice, pick_manually, speed):
|
||||||
chapters = find_chapters(book)
|
chapters = find_chapters(book)
|
||||||
print('Selected chapters:', [c.get_name() for c in chapters])
|
print('Selected chapters:', [c.get_name() for c in chapters])
|
||||||
texts = extract_texts(chapters)
|
texts = extract_texts(chapters)
|
||||||
|
|
||||||
has_ffmpeg = shutil.which('ffmpeg') is not None
|
has_ffmpeg = shutil.which('ffmpeg') is not None
|
||||||
if not has_ffmpeg:
|
if not has_ffmpeg:
|
||||||
print('\033[91m' + 'ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files.' + '\033[0m')
|
print('\033[91m' + 'ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files.' + '\033[0m')
|
||||||
total_chars = sum([len(t) for t in texts])
|
|
||||||
|
total_chars, processed_chars = sum(map(len, texts)), 0
|
||||||
print('Started at:', time.strftime('%H:%M:%S'))
|
print('Started at:', time.strftime('%H:%M:%S'))
|
||||||
print(f'Total characters: {total_chars:,}')
|
print(f'Total characters: {total_chars:,}')
|
||||||
print('Total words:', len(' '.join(texts).split(' ')))
|
print('Total words:', len(' '.join(texts).split()))
|
||||||
|
|
||||||
i = 1
|
|
||||||
chapter_mp3_files = []
|
chapter_mp3_files = []
|
||||||
for text in texts:
|
durations = {}
|
||||||
if len(text) == 0:
|
|
||||||
|
for i, text in enumerate(texts, start=1):
|
||||||
|
if len(text.strip()) < 10:
|
||||||
|
print(f'Skipping empty chapter {i}')
|
||||||
continue
|
continue
|
||||||
chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav')
|
chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav')
|
||||||
chapter_mp3_files.append(chapter_filename)
|
chapter_mp3_files.append(chapter_filename)
|
||||||
|
@ -60,25 +85,30 @@ def main(kokoro, file_path, lang, voice, pick_manually, speed):
|
||||||
i += 1
|
i += 1
|
||||||
chapter_mp3_files.remove(chapter_filename)
|
chapter_mp3_files.remove(chapter_filename)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f'Reading chapter {i} ({len(text):,} characters)...')
|
print(f'Reading chapter {i} ({len(text):,} characters)...')
|
||||||
if i == 1:
|
if i == 1:
|
||||||
text = intro + '.\n\n' + text
|
text = intro + '.\n\n' + text
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
samples, sample_rate = kokoro.create(text, voice=voice, speed=speed, lang=lang)
|
samples, sample_rate = kokoro.create(text, voice=voice, speed=speed, lang=lang)
|
||||||
sf.write(f'{chapter_filename}', samples, sample_rate)
|
sf.write(f'{chapter_filename}', samples, sample_rate)
|
||||||
|
durations[chapter_filename] = len(samples)/sample_rate
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
delta_seconds = end_time - start_time
|
delta_seconds = end_time - start_time
|
||||||
chars_per_sec = len(text) / delta_seconds
|
chars_per_sec = len(text) / delta_seconds
|
||||||
remaining_chars = sum([len(t) for t in texts[i - 1:]])
|
processed_chars += len(text)
|
||||||
|
remaining_chars = total_chars - processed_chars
|
||||||
remaining_time = remaining_chars / chars_per_sec
|
remaining_time = remaining_chars / chars_per_sec
|
||||||
print(f'Estimated time remaining: {strfdelta(remaining_time)}')
|
print(f'Estimated time remaining: {strfdelta(remaining_time)}')
|
||||||
print('Chapter written to', chapter_filename)
|
print('Chapter written to', chapter_filename)
|
||||||
print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)')
|
print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)')
|
||||||
progress = int((total_chars - remaining_chars) / total_chars * 100)
|
progress = processed_chars * 100 // total_chars
|
||||||
print('Progress:', f'{progress}%')
|
print('Progress:', f'{progress}%')
|
||||||
i += 1
|
|
||||||
if has_ffmpeg:
|
if has_ffmpeg:
|
||||||
create_m4b(chapter_mp3_files, filename, title, creator)
|
create_index_file(title, creator, chapter_mp3_files, durations)
|
||||||
|
create_m4b(chapter_mp3_files, filename, title, creator, cover_image)
|
||||||
|
|
||||||
|
|
||||||
def extract_texts(chapters):
|
def extract_texts(chapters):
|
||||||
|
@ -98,17 +128,12 @@ def extract_texts(chapters):
|
||||||
|
|
||||||
def is_chapter(c):
|
def is_chapter(c):
|
||||||
name = c.get_name().lower()
|
name = c.get_name().lower()
|
||||||
part = r"part\d{1,3}"
|
return bool(
|
||||||
if re.search(part, name):
|
'chapter' in name.lower()
|
||||||
return True
|
or re.search(r'part\d{1,3}', name)
|
||||||
ch = r"ch\d{1,3}"
|
or re.search(r'ch\d{1,3}', name)
|
||||||
if re.search(ch, name):
|
or re.search(r'chap\d{1,3}', name)
|
||||||
return True
|
)
|
||||||
chap = r"chap\d{1,3}"
|
|
||||||
if re.search(chap, name):
|
|
||||||
return True
|
|
||||||
if 'chapter' in name:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def find_chapters(book, verbose=False):
|
def find_chapters(book, verbose=False):
|
||||||
|
@ -146,8 +171,8 @@ def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
|
||||||
return f.format(fmt, **values)
|
return f.format(fmt, **values)
|
||||||
|
|
||||||
|
|
||||||
def create_m4b(chapter_files, filename, title, author):
|
def create_m4b(chapter_files, filename, title, author, cover_image):
|
||||||
tmp_filename = filename.replace('.epub', '.tmp.m4a')
|
tmp_filename = filename.replace('.epub', '.tmp.mp4')
|
||||||
if not Path(tmp_filename).exists():
|
if not Path(tmp_filename).exists():
|
||||||
combined_audio = AudioSegment.empty()
|
combined_audio = AudioSegment.empty()
|
||||||
for wav_file in chapter_files:
|
for wav_file in chapter_files:
|
||||||
|
@ -157,10 +182,28 @@ def create_m4b(chapter_files, filename, title, author):
|
||||||
combined_audio.export(tmp_filename, format="mp4", codec="aac", bitrate="64k")
|
combined_audio.export(tmp_filename, format="mp4", codec="aac", bitrate="64k")
|
||||||
final_filename = filename.replace('.epub', '.m4b')
|
final_filename = filename.replace('.epub', '.m4b')
|
||||||
print('Creating M4B file...')
|
print('Creating M4B file...')
|
||||||
|
|
||||||
|
if cover_image:
|
||||||
|
cover_image_file = NamedTemporaryFile("wb")
|
||||||
|
cover_image_file.write(cover_image)
|
||||||
|
cover_image_args = ["-i", cover_image_file.name, "-map", "0:a", "-map", "1:v"]
|
||||||
|
else:
|
||||||
|
cover_image_args = []
|
||||||
|
|
||||||
proc = subprocess.run([
|
proc = subprocess.run([
|
||||||
'ffmpeg', '-i', f'{tmp_filename}', '-c', 'copy', '-f', 'mp4',
|
'ffmpeg',
|
||||||
'-metadata', f'title={title}',
|
'-i', f'{tmp_filename}',
|
||||||
'-metadata', f'author={author}',
|
'-i', 'chapters.txt',
|
||||||
|
#'-map', '0',
|
||||||
|
#'-map_metadata', '1',
|
||||||
|
*cover_image_args,
|
||||||
|
'-c:a', 'copy',
|
||||||
|
'-c:v', 'copy',
|
||||||
|
'-disposition:v', 'attached_pic',
|
||||||
|
'-metadata:s:v', f'title={title}',
|
||||||
|
'-metadata', f'artist={author}',
|
||||||
|
'-c', 'copy',
|
||||||
|
'-f', 'mp4',
|
||||||
f'{final_filename}'
|
f'{final_filename}'
|
||||||
])
|
])
|
||||||
Path(tmp_filename).unlink()
|
Path(tmp_filename).unlink()
|
||||||
|
@ -168,19 +211,45 @@ def create_m4b(chapter_files, filename, title, author):
|
||||||
print(f'{final_filename} created. Enjoy your audiobook.')
|
print(f'{final_filename} created. Enjoy your audiobook.')
|
||||||
print('Feel free to delete the intermediary .wav chapter files, the .m4b is all you need.')
|
print('Feel free to delete the intermediary .wav chapter files, the .m4b is all you need.')
|
||||||
|
|
||||||
|
def probe_duration(file_name):
|
||||||
|
args = ['ffprobe', '-i', file_name, '-show_entries', 'format=duration', '-v', 'quiet', '-of', 'default=noprint_wrappers=1:nokey=1']
|
||||||
|
proc = subprocess.run(args, capture_output=True, text=True, check=True)
|
||||||
|
return float(proc.stdout.strip())
|
||||||
|
|
||||||
|
def create_index_file(title, creator, chapter_mp3_files, durations):
|
||||||
|
with open("chapters.txt", "w") as f:
|
||||||
|
f.write(f";FFMETADATA1\ntitle={title}\nartist={creator}\n\n")
|
||||||
|
start = 0
|
||||||
|
i = 0
|
||||||
|
for c in chapter_mp3_files:
|
||||||
|
if c not in durations:
|
||||||
|
durations[c] = probe_duration(c)
|
||||||
|
end = start + (int)(durations[c] * 1000)
|
||||||
|
f.write(f"[CHAPTER]\nTIMEBASE=1/1000\nSTART={start}\nEND={end}\ntitle=Chapter {i}\n\n")
|
||||||
|
i += 1
|
||||||
|
start = end
|
||||||
|
|
||||||
|
|
||||||
def cli_main():
|
def cli_main():
|
||||||
if not Path('kokoro-v0_19.onnx').exists() or not Path('voices.json').exists():
|
MODEL_NAME = 'kokoro-v0_19.onnx'
|
||||||
|
CUDA_PROVIDER = "CUDAExecutionProvider"
|
||||||
|
VOICES = 'voices.json'
|
||||||
|
if not Path(MODEL_NAME).exists() or not Path(VOICES).exists():
|
||||||
print('Error: kokoro-v0_19.onnx and voices.json must be in the current directory. Please download them with:')
|
print('Error: kokoro-v0_19.onnx and voices.json must be in the current directory. Please download them with:')
|
||||||
print('wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx')
|
print('wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx')
|
||||||
print('wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.json')
|
print('wget https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.json')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
kokoro = Kokoro('kokoro-v0_19.onnx', 'voices.json')
|
kokoro = Kokoro(MODEL_NAME, VOICES)
|
||||||
voices = list(kokoro.get_voices())
|
voices = list(kokoro.get_voices())
|
||||||
voices_str = ', '.join(voices)
|
voices_str = ', '.join(voices)
|
||||||
epilog = 'example:\n' + \
|
epilog = 'example:\n' + \
|
||||||
' audiblez book.epub -l en-us -v af_sky'
|
' audiblez book.epub -l en-us -v af_sky'
|
||||||
default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
|
default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
|
||||||
|
|
||||||
|
# Get available ONNX providers
|
||||||
|
available_providers = ort.get_available_providers()
|
||||||
|
providers_help = f"Available ONNX providers: {', '.join(available_providers)}"
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
|
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||||
parser.add_argument('epub_file_path', help='Path to the epub file')
|
parser.add_argument('epub_file_path', help='Path to the epub file')
|
||||||
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
|
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
|
||||||
|
@ -188,11 +257,13 @@ def cli_main():
|
||||||
parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook',
|
parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook',
|
||||||
action='store_true')
|
action='store_true')
|
||||||
parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float)
|
parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float)
|
||||||
|
parser.add_argument('--providers', nargs='+', metavar='PROVIDER', help=f"Specify ONNX providers. {providers_help}")
|
||||||
|
|
||||||
if len(sys.argv) == 1:
|
if len(sys.argv) == 1:
|
||||||
parser.print_help(sys.stderr)
|
parser.print_help(sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed)
|
main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed, args.providers)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[project]
|
[project]
|
||||||
name = "audiblez"
|
name = "audiblez"
|
||||||
version = "0.1.12"
|
version = "0.2.0"
|
||||||
description = "Generate audiobooks from e-books (epub to wav/m4b)"
|
description = "Generate audiobooks from e-books (epub to wav/m4b)"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "Claudio Santini", email = "hireclaudio@gmail.com" }
|
{ name = "Claudio Santini", email = "hireclaudio@gmail.com" }
|
||||||
|
|
Loading…
Add table
Reference in a new issue