From cf64b00b5133158fc9eb3fcd8eeca351ebbf6c54 Mon Sep 17 00:00:00 2001 From: Pat Brown Date: Thu, 16 Jan 2025 20:01:16 -0800 Subject: [PATCH] Support for onnxruntime-gpu and documentation. --- .gitignore | 1 + README.md | 37 +++++++++++++++++++++++++++++++++++-- audiblez.py | 23 +++++++++++++++++++++-- 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 872316b..9dc7a0f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ epub *.json *.onnx dist +.venv diff --git a/README.md b/README.md index 5102c88..730d5fb 100644 --- a/README.md +++ b/README.md @@ -33,14 +33,47 @@ audiblez book.epub -l en-gb -v af_sky It will first create a bunch of `book_chapter_1.wav`, `book_chapter_2.wav`, etc. files in the same directory, and at the end it will produce a `book.m4b` file with the whole book you can listen with VLC or any - audiobook player. +audiobook player. It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine. +### Using the `--providers` option for ONNX + +If you want to use a GPU for faster performance, install the GPU-enabled ONNX Runtime and specify a runtime provider with the `--providers` flag. By default, the CPU-enabled ONNX Runtime is installed. The GPU runtime must be installed manually. + +```bash +pip install onnxruntime-gpu +``` + +To specify ONNX providers, such as using an NVIDIA GPU, use the `--providers` tag. For example: + +```bash +audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider +``` + +To see the list of available providers on your system, run the following: + +```bash +audiblez --help +``` + +or + +```bash +python -c "import onnxruntime as ort; print(ort.get_available_providers())" +``` + +This will display the ONNX providers that can be used, such as `CUDAExecutionProvider` for NVIDIA GPUs or `CPUExecutionProvider` for CPU-only execution. + +You can specify a provider hierarchy by providing multiple hierarchies separated by spaces. + +```bash +audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider CPUExecutionProvider +``` + ## Supported Languages Use `-l` option to specify the language, available language codes are: πŸ‡ΊπŸ‡Έ `en-us`, πŸ‡¬πŸ‡§ `en-gb`, πŸ‡«πŸ‡· `fr-fr`, πŸ‡―πŸ‡΅ `ja`, πŸ‡°πŸ‡· `kr` and πŸ‡¨πŸ‡³ `cmn`. - ## Speed By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0: diff --git a/audiblez.py b/audiblez.py index cd5f3f7..25738fe 100755 --- a/audiblez.py +++ b/audiblez.py @@ -19,9 +19,21 @@ from kokoro_onnx import Kokoro from ebooklib import epub from pydub import AudioSegment from pick import pick +import onnxruntime as ort -def main(kokoro, file_path, lang, voice, pick_manually, speed): +def main(kokoro, file_path, lang, voice, pick_manually, speed, providers): + # Set ONNX providers if specified + if providers: + available_providers = ort.get_available_providers() + invalid_providers = [p for p in providers if p not in available_providers] + if invalid_providers: + print(f"Invalid ONNX providers: {', '.join(invalid_providers)}") + print(f"Available providers: {', '.join(available_providers)}") + sys.exit(1) + kokoro.sess.set_providers(providers) + print(f"Using ONNX providers: {', '.join(providers)}") + filename = Path(file_path).name with warnings.catch_warnings(): book = epub.read_epub(file_path) @@ -180,6 +192,11 @@ def cli_main(): epilog = 'example:\n' + \ ' audiblez book.epub -l en-us -v af_sky' default_voice = 'af_sky' if 'af_sky' in voices else voices[0] + + # Get available ONNX providers + available_providers = ort.get_available_providers() + providers_help = f"Available ONNX providers: {', '.join(available_providers)}" + parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('epub_file_path', help='Path to the epub file') parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn') @@ -187,11 +204,13 @@ def cli_main(): parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook', action='store_true') parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float) + parser.add_argument('--providers', nargs='+', metavar='PROVIDER', help=f"Specify ONNX providers. {providers_help}") + if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) args = parser.parse_args() - main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed) + main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed, args.providers) if __name__ == '__main__':