This commit is contained in:
Claudio Santini 2025-01-30 09:24:21 +01:00
parent 12fbf89fcc
commit bf09d6ad82
3 changed files with 23 additions and 20 deletions

View file

@ -57,14 +57,14 @@ You can try them here: [https://huggingface.co/spaces/hexgrad/Kokoro-TTS](https:
## How to run on GPU
By default audiblez runs on CPU. If it finds CUDA available as Torch device it tries to use it.
Experimental support for Cuda is available b
We don't currently support Apple Silicon, as there is not yet a Kokoro implementation in MLX. As soon as it will be available, we will support it.
## How to run on Google Colab
By default audiblez runs on CPU. If you pass the option `--cuda` it will try to use the Cuda device via Torch.
Check out this example: [Audiblez running on a Google Colab Notebook with Cuda ](https://colab.research.google.com/drive/164PQLowogprWQpRjKk33e-8IORAvqXKI?usp=sharing]).
We don't currently support Apple Silicon, as there is not yet a Kokoro implementation in MLX. As soon as it will be available, we will support it.
## Author
by [Claudio Santini](https://claudio.uk) in 2025, distributed under MIT licence.

View file

@ -67,8 +67,6 @@ def main(pipeline, file_path, voice, pick_manually, speed):
print(f'Estimated time remaining (assuming {chars_per_sec} chars/sec): {strfdelta((total_chars - processed_chars) / chars_per_sec)}')
chapter_mp3_files = []
durations = {}
for i, text in enumerate(texts, start=1):
chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav')
chapter_mp3_files.append(chapter_filename)
@ -97,8 +95,8 @@ def main(pipeline, file_path, voice, pick_manually, speed):
print('Progress:', f'{progress}%\n')
if has_ffmpeg:
create_index_file(title, creator, chapter_mp3_files, durations)
create_m4b(chapter_mp3_files, filename, title, creator, cover_image)
create_index_file(title, creator, chapter_mp3_files)
create_m4b(chapter_mp3_files, filename, cover_image)
def extract_texts(chapters):
@ -132,7 +130,6 @@ def find_chapters(book, verbose=False):
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
print(f"'{item.get_name()}'" + ', #' + str(len(item.get_body_content())))
# print(f'{item.get_name()}'.ljust(60), str(len(item.get_body_content())).ljust(15), 'X' if item in chapters else '-')
if len(chapters) == 0:
print('Not easy to find the chapters, defaulting to all available documents.')
chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT]
@ -161,7 +158,7 @@ def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
return f.format(fmt, **values)
def create_m4b(chapter_files, filename, title, author, cover_image):
def create_m4b(chapter_files, filename, cover_image):
tmp_filename = filename.replace('.epub', '.tmp.mp4')
if not Path(tmp_filename).exists():
combined_audio = AudioSegment.empty()
@ -206,15 +203,14 @@ def probe_duration(file_name):
return float(proc.stdout.strip())
def create_index_file(title, creator, chapter_mp3_files, durations):
def create_index_file(title, creator, chapter_mp3_files):
with open("chapters.txt", "w") as f:
f.write(f";FFMETADATA1\ntitle={title}\nartist={creator}\n\n")
start = 0
i = 0
for c in chapter_mp3_files:
if c not in durations:
durations[c] = probe_duration(c)
end = start + (int)(durations[c] * 1000)
duration = probe_duration(c)
end = start + (int)(duration * 1000)
f.write(f"[CHAPTER]\nTIMEBASE=1/1000\nSTART={start}\nEND={end}\ntitle=Chapter {i}\n\n")
i += 1
start = end
@ -231,12 +227,14 @@ def cli_main():
parser.add_argument('-v', '--voice', default=default_voice, help=f'Choose narrating voice: {voices_str}')
parser.add_argument('-p', '--pick', default=False, help=f'Interactively select which chapters to read in the audiobook', action='store_true')
parser.add_argument('-s', '--speed', default=1.0, help=f'Set speed from 0.5 to 2.0', type=float)
parser.add_argument('-c', '--cuda', default=False, help=f'Use GPU via Cuda in Torch if available', type=float)
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
if args.cuda:
if torch.cuda.is_available():
print('CUDA GPU available')
torch.set_default_device('cuda')

View file

@ -8,7 +8,7 @@ from audiblez import main
class MainTest(unittest.TestCase):
def base(self, file_path, **kwargs):
pipeline = KPipeline(lang_code='a') # a for american or b for british
pipeline = KPipeline(lang_code='a')
main(pipeline, file_path=file_path, voice='af_sky', pick_manually=False, speed=1, **kwargs)
# def test_0_txt(self):
@ -30,3 +30,8 @@ class MainTest(unittest.TestCase):
# Path('gene.m4b').unlink(missing_ok=True)
# self.base(file_path='../epub/gene.epub')
# self.assertTrue(Path('gene.m4b').exists())
def test_orwell(self):
Path('orwell.m4b').unlink(missing_ok=True)
self.base(file_path='../epub/orwell.epub')
self.assertTrue(Path('orwell.m4b').exists())