Support for onnxruntime-gpu and documentation.

This commit is contained in:
Pat Brown 2025-01-16 20:01:16 -08:00
parent 173e2fb4c9
commit cf64b00b51
3 changed files with 57 additions and 4 deletions

1
.gitignore vendored
View file

@ -8,3 +8,4 @@ epub
*.json
*.onnx
dist
.venv

View file

@ -33,14 +33,47 @@ audiblez book.epub -l en-gb -v af_sky
It will first create a bunch of `book_chapter_1.wav`, `book_chapter_2.wav`, etc. files in the same directory,
and at the end it will produce a `book.m4b` file with the whole book you can listen with VLC or any
audiobook player.
audiobook player.
It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine.
### Using the `--providers` option for ONNX
If you want to use a GPU for faster performance, install the GPU-enabled ONNX Runtime and specify a runtime provider with the `--providers` flag. By default, the CPU-enabled ONNX Runtime is installed. The GPU runtime must be installed manually.
```bash
pip install onnxruntime-gpu
```
To specify ONNX providers, such as using an NVIDIA GPU, use the `--providers` tag. For example:
```bash
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider
```
To see the list of available providers on your system, run the following:
```bash
audiblez --help
```
or
```bash
python -c "import onnxruntime as ort; print(ort.get_available_providers())"
```
This will display the ONNX providers that can be used, such as `CUDAExecutionProvider` for NVIDIA GPUs or `CPUExecutionProvider` for CPU-only execution.
You can specify a provider hierarchy by providing multiple hierarchies separated by spaces.
```bash
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider CPUExecutionProvider
```
## Supported Languages
Use `-l` option to specify the language, available language codes are:
🇺🇸 `en-us`, 🇬🇧 `en-gb`, 🇫🇷 `fr-fr`, 🇯🇵 `ja`, 🇰🇷 `kr` and 🇨🇳 `cmn`.
## Speed
By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0:

View file

@ -19,9 +19,21 @@ from kokoro_onnx import Kokoro
from ebooklib import epub
from pydub import AudioSegment
from pick import pick
import onnxruntime as ort
def main(kokoro, file_path, lang, voice, pick_manually, speed):
def main(kokoro, file_path, lang, voice, pick_manually, speed, providers):
# Set ONNX providers if specified
if providers:
available_providers = ort.get_available_providers()
invalid_providers = [p for p in providers if p not in available_providers]
if invalid_providers:
print(f"Invalid ONNX providers: {', '.join(invalid_providers)}")
print(f"Available providers: {', '.join(available_providers)}")
sys.exit(1)
kokoro.sess.set_providers(providers)
print(f"Using ONNX providers: {', '.join(providers)}")
filename = Path(file_path).name
with warnings.catch_warnings():
book = epub.read_epub(file_path)
@ -180,6 +192,11 @@ def cli_main():
epilog = 'example:\n' + \
' audiblez book.epub -l en-us -v af_sky'
default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
# Get available ONNX providers
available_providers = ort.get_available_providers()
providers_help = f"Available ONNX providers: {', '.join(available_providers)}"
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('epub_file_path', help='Path to the epub file')
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
@ -187,11 +204,13 @@ def cli_main():
parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook',
action='store_true')
parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float)
parser.add_argument('--providers', nargs='+', metavar='PROVIDER', help=f"Specify ONNX providers. {providers_help}")
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed)
main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed, args.providers)
if __name__ == '__main__':