Support for onnxruntime-gpu and documentation.

2025-04-13 09:38:57 +00:00 · 2025-01-16 20:01:16 -08:00 · 2025-01-16 20:01:16 -08:00 · cf64b00b51
commit cf64b00b51
parent 173e2fb4c9
3 changed files with 57 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,3 +8,4 @@ epub
 *.json
 *.onnx
 dist
 .venv
--- a/README.md
+++ b/README.md
@ -36,11 +36,44 @@ and at the end it will produce a `book.m4b` file with the whole book you can lis
 audiobook player.
 It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine.
 ### Using the `--providers` option for ONNX
 If you want to use a GPU for faster performance, install the GPU-enabled ONNX Runtime and specify a runtime provider with the `--providers` flag. By default, the CPU-enabled ONNX Runtime is installed. The GPU runtime must be installed manually.
 ```bash
 pip install onnxruntime-gpu
 ```
 To specify ONNX providers, such as using an NVIDIA GPU, use the `--providers` tag. For example:
 ```bash
 audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider
 ```
 To see the list of available providers on your system, run the following:
 ```bash
 audiblez --help
 ```
 or
 ```bash
 python -c "import onnxruntime as ort; print(ort.get_available_providers())"
 ```
 This will display the ONNX providers that can be used, such as `CUDAExecutionProvider` for NVIDIA GPUs or `CPUExecutionProvider` for CPU-only execution.
 You can specify a provider hierarchy by providing multiple hierarchies separated by spaces.
 ```bash
 audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider CPUExecutionProvider
 ```
 ## Supported Languages
 Use `-l` option to specify the language, available language codes are:
 🇺🇸 `en-us`, 🇬🇧 `en-gb`, 🇫🇷 `fr-fr`, 🇯🇵 `ja`, 🇰🇷 `kr` and 🇨🇳 `cmn`.
 ## Speed
 By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0:
--- a/audiblez.py
+++ b/audiblez.py
@ -19,9 +19,21 @@ from kokoro_onnx import Kokoro
 from ebooklib import epub
 from pydub import AudioSegment
 from pick import pick
 import onnxruntime as ort
-def main(kokoro, file_path, lang, voice, pick_manually, speed):
+def main(kokoro, file_path, lang, voice, pick_manually, speed, providers):
    # Set ONNX providers if specified
    if providers:
        available_providers = ort.get_available_providers()
        invalid_providers = [p for p in providers if p not in available_providers]
        if invalid_providers:
            print(f"Invalid ONNX providers: {', '.join(invalid_providers)}")
            print(f"Available providers: {', '.join(available_providers)}")
            sys.exit(1)
        kokoro.sess.set_providers(providers)
        print(f"Using ONNX providers: {', '.join(providers)}")
    filename = Path(file_path).name
    with warnings.catch_warnings():
        book = epub.read_epub(file_path)
@ -180,6 +192,11 @@ def cli_main():
    epilog = 'example:\n' + \
             '  audiblez book.epub -l en-us -v af_sky'
    default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
    # Get available ONNX providers
    available_providers = ort.get_available_providers()
    providers_help = f"Available ONNX providers: {', '.join(available_providers)}"
    parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('epub_file_path', help='Path to the epub file')
    parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
@ -187,11 +204,13 @@ def cli_main():
    parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook',
                        action='store_true')
    parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float)
    parser.add_argument('--providers', nargs='+', metavar='PROVIDER', help=f"Specify ONNX providers. {providers_help}")
    if len(sys.argv) == 1:
        parser.print_help(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
-    main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed)
+    main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed, args.providers)
 if __name__ == '__main__':