Support for onnxruntime-gpu and documentation.

This commit is contained in:
Pat Brown 2025-01-16 20:01:16 -08:00
parent 173e2fb4c9
commit cf64b00b51
3 changed files with 57 additions and 4 deletions

1
.gitignore vendored
View file

@ -8,3 +8,4 @@ epub
*.json *.json
*.onnx *.onnx
dist dist
.venv

View file

@ -36,11 +36,44 @@ and at the end it will produce a `book.m4b` file with the whole book you can lis
audiobook player. audiobook player.
It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine. It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine.
### Using the `--providers` option for ONNX
If you want to use a GPU for faster performance, install the GPU-enabled ONNX Runtime and specify a runtime provider with the `--providers` flag. By default, the CPU-enabled ONNX Runtime is installed. The GPU runtime must be installed manually.
```bash
pip install onnxruntime-gpu
```
To specify ONNX providers, such as using an NVIDIA GPU, use the `--providers` tag. For example:
```bash
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider
```
To see the list of available providers on your system, run the following:
```bash
audiblez --help
```
or
```bash
python -c "import onnxruntime as ort; print(ort.get_available_providers())"
```
This will display the ONNX providers that can be used, such as `CUDAExecutionProvider` for NVIDIA GPUs or `CPUExecutionProvider` for CPU-only execution.
You can specify a provider hierarchy by providing multiple hierarchies separated by spaces.
```bash
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider CPUExecutionProvider
```
## Supported Languages ## Supported Languages
Use `-l` option to specify the language, available language codes are: Use `-l` option to specify the language, available language codes are:
🇺🇸 `en-us`, 🇬🇧 `en-gb`, 🇫🇷 `fr-fr`, 🇯🇵 `ja`, 🇰🇷 `kr` and 🇨🇳 `cmn`. 🇺🇸 `en-us`, 🇬🇧 `en-gb`, 🇫🇷 `fr-fr`, 🇯🇵 `ja`, 🇰🇷 `kr` and 🇨🇳 `cmn`.
## Speed ## Speed
By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0: By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0:

View file

@ -19,9 +19,21 @@ from kokoro_onnx import Kokoro
from ebooklib import epub from ebooklib import epub
from pydub import AudioSegment from pydub import AudioSegment
from pick import pick from pick import pick
import onnxruntime as ort
def main(kokoro, file_path, lang, voice, pick_manually, speed): def main(kokoro, file_path, lang, voice, pick_manually, speed, providers):
# Set ONNX providers if specified
if providers:
available_providers = ort.get_available_providers()
invalid_providers = [p for p in providers if p not in available_providers]
if invalid_providers:
print(f"Invalid ONNX providers: {', '.join(invalid_providers)}")
print(f"Available providers: {', '.join(available_providers)}")
sys.exit(1)
kokoro.sess.set_providers(providers)
print(f"Using ONNX providers: {', '.join(providers)}")
filename = Path(file_path).name filename = Path(file_path).name
with warnings.catch_warnings(): with warnings.catch_warnings():
book = epub.read_epub(file_path) book = epub.read_epub(file_path)
@ -180,6 +192,11 @@ def cli_main():
epilog = 'example:\n' + \ epilog = 'example:\n' + \
' audiblez book.epub -l en-us -v af_sky' ' audiblez book.epub -l en-us -v af_sky'
default_voice = 'af_sky' if 'af_sky' in voices else voices[0] default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
# Get available ONNX providers
available_providers = ort.get_available_providers()
providers_help = f"Available ONNX providers: {', '.join(available_providers)}"
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter) parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('epub_file_path', help='Path to the epub file') parser.add_argument('epub_file_path', help='Path to the epub file')
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn') parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
@ -187,11 +204,13 @@ def cli_main():
parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook', parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook',
action='store_true') action='store_true')
parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float) parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float)
parser.add_argument('--providers', nargs='+', metavar='PROVIDER', help=f"Specify ONNX providers. {providers_help}")
if len(sys.argv) == 1: if len(sys.argv) == 1:
parser.print_help(sys.stderr) parser.print_help(sys.stderr)
sys.exit(1) sys.exit(1)
args = parser.parse_args() args = parser.parse_args()
main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed) main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed, args.providers)
if __name__ == '__main__': if __name__ == '__main__':