mirror of
https://github.com/santinic/audiblez.git
synced 2025-04-13 09:38:57 +00:00
Support for onnxruntime-gpu and documentation.
This commit is contained in:
parent
173e2fb4c9
commit
cf64b00b51
3 changed files with 57 additions and 4 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -8,3 +8,4 @@ epub
|
|||
*.json
|
||||
*.onnx
|
||||
dist
|
||||
.venv
|
||||
|
|
37
README.md
37
README.md
|
@ -33,14 +33,47 @@ audiblez book.epub -l en-gb -v af_sky
|
|||
|
||||
It will first create a bunch of `book_chapter_1.wav`, `book_chapter_2.wav`, etc. files in the same directory,
|
||||
and at the end it will produce a `book.m4b` file with the whole book you can listen with VLC or any
|
||||
audiobook player.
|
||||
audiobook player.
|
||||
It will only produce the `.m4b` file if you have `ffmpeg` installed on your machine.
|
||||
|
||||
### Using the `--providers` option for ONNX
|
||||
|
||||
If you want to use a GPU for faster performance, install the GPU-enabled ONNX Runtime and specify a runtime provider with the `--providers` flag. By default, the CPU-enabled ONNX Runtime is installed. The GPU runtime must be installed manually.
|
||||
|
||||
```bash
|
||||
pip install onnxruntime-gpu
|
||||
```
|
||||
|
||||
To specify ONNX providers, such as using an NVIDIA GPU, use the `--providers` tag. For example:
|
||||
|
||||
```bash
|
||||
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider
|
||||
```
|
||||
|
||||
To see the list of available providers on your system, run the following:
|
||||
|
||||
```bash
|
||||
audiblez --help
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -c "import onnxruntime as ort; print(ort.get_available_providers())"
|
||||
```
|
||||
|
||||
This will display the ONNX providers that can be used, such as `CUDAExecutionProvider` for NVIDIA GPUs or `CPUExecutionProvider` for CPU-only execution.
|
||||
|
||||
You can specify a provider hierarchy by providing multiple hierarchies separated by spaces.
|
||||
|
||||
```bash
|
||||
audiblez book.epub -l en-gb -v af_sky --providers CUDAExecutionProvider CPUExecutionProvider
|
||||
```
|
||||
|
||||
## Supported Languages
|
||||
Use `-l` option to specify the language, available language codes are:
|
||||
🇺🇸 `en-us`, 🇬🇧 `en-gb`, 🇫🇷 `fr-fr`, 🇯🇵 `ja`, 🇰🇷 `kr` and 🇨🇳 `cmn`.
|
||||
|
||||
|
||||
## Speed
|
||||
By default the audio is generated using a normal speed, but you can make it up to twice slower or faster by specifying a speed argument between 0.5 to 2.0:
|
||||
|
||||
|
|
23
audiblez.py
23
audiblez.py
|
@ -19,9 +19,21 @@ from kokoro_onnx import Kokoro
|
|||
from ebooklib import epub
|
||||
from pydub import AudioSegment
|
||||
from pick import pick
|
||||
import onnxruntime as ort
|
||||
|
||||
|
||||
def main(kokoro, file_path, lang, voice, pick_manually, speed):
|
||||
def main(kokoro, file_path, lang, voice, pick_manually, speed, providers):
|
||||
# Set ONNX providers if specified
|
||||
if providers:
|
||||
available_providers = ort.get_available_providers()
|
||||
invalid_providers = [p for p in providers if p not in available_providers]
|
||||
if invalid_providers:
|
||||
print(f"Invalid ONNX providers: {', '.join(invalid_providers)}")
|
||||
print(f"Available providers: {', '.join(available_providers)}")
|
||||
sys.exit(1)
|
||||
kokoro.sess.set_providers(providers)
|
||||
print(f"Using ONNX providers: {', '.join(providers)}")
|
||||
|
||||
filename = Path(file_path).name
|
||||
with warnings.catch_warnings():
|
||||
book = epub.read_epub(file_path)
|
||||
|
@ -180,6 +192,11 @@ def cli_main():
|
|||
epilog = 'example:\n' + \
|
||||
' audiblez book.epub -l en-us -v af_sky'
|
||||
default_voice = 'af_sky' if 'af_sky' in voices else voices[0]
|
||||
|
||||
# Get available ONNX providers
|
||||
available_providers = ort.get_available_providers()
|
||||
providers_help = f"Available ONNX providers: {', '.join(available_providers)}"
|
||||
|
||||
parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument('epub_file_path', help='Path to the epub file')
|
||||
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
|
||||
|
@ -187,11 +204,13 @@ def cli_main():
|
|||
parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook',
|
||||
action='store_true')
|
||||
parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float)
|
||||
parser.add_argument('--providers', nargs='+', metavar='PROVIDER', help=f"Specify ONNX providers. {providers_help}")
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
parser.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
args = parser.parse_args()
|
||||
main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed)
|
||||
main(kokoro, args.epub_file_path, args.lang, args.voice, args.pick, args.speed, args.providers)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Add table
Reference in a new issue