From db8fabf9cac51d09b3d546cc0cdaa023d274b98d Mon Sep 17 00:00:00 2001 From: Claudio Santini Date: Sun, 9 Feb 2025 18:32:55 +0100 Subject: [PATCH] it works --- README.md | 4 +- audiblez/core.py | 72 +++++++++++++------------- audiblez/ui.py | 130 ++++++++++++++++++++++++++++++----------------- poetry.lock | 32 +----------- pyproject.toml | 1 - 5 files changed, 125 insertions(+), 114 deletions(-) diff --git a/README.md b/README.md index c80e6a6..8338050 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,9 @@ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/audiblez) ![PyPI - Version](https://img.shields.io/pypi/v/audiblez) -### v3 Now with CUDA support and many more languages! +### v4 Now with Graphical interface, CUDA support, and many languages! + +![Audiblez GUI on MacOSX](./imgs/mac.png) Audiblez generates `.m4b` audiobooks from regular `.epub` e-books, using Kokoro's high-quality speech synthesis. diff --git a/audiblez/core.py b/audiblez/core.py index 997ca86..9055f07 100755 --- a/audiblez/core.py +++ b/audiblez/core.py @@ -2,8 +2,6 @@ # audiblez - A program to convert e-books into audiobooks using # Kokoro-82M model for high-quality text-to-speech synthesis. # by Claudio Santini 2025 - https://claudio.uk -from io import StringIO - import torch.cuda import spacy import ebooklib @@ -13,12 +11,12 @@ import time import shutil import subprocess import re - +from io import StringIO +from types import SimpleNamespace from markdown import Markdown from tabulate import tabulate from pathlib import Path from string import Formatter -from yaspin import yaspin from bs4 import BeautifulSoup from kokoro import KPipeline from ebooklib import epub @@ -34,6 +32,13 @@ def load_spacy(): spacy.cli.download("xx_ent_wiki_sm") +def print_progress(stats): + progress = stats.processed_chars * 100 // stats.total_chars + eta = strfdelta((stats.total_chars - stats.processed_chars) / stats.chars_per_sec) + print(f'Estimated time remaining: {eta}') + print('Progress:', f'{progress}%\n') + + def main(file_path, voice, pick_manually, speed, output_folder='.', max_chapters=None, max_sentences=None, selected_chapters=None, post_event=None): if post_event: post_event('CORE_STARTED') @@ -69,13 +74,15 @@ def main(file_path, voice, pick_manually, speed, output_folder='.', if not has_ffmpeg: print('\033[91m' + 'ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files.' + '\033[0m') - total_chars, processed_chars = sum(map(len, texts)), 0 + stats = SimpleNamespace( + total_chars=sum(map(len, texts)), + processed_chars=0, + chars_per_sec=500 if torch.cuda.is_available() else 50) print('Started at:', time.strftime('%H:%M:%S')) - print(f'Total characters: {total_chars:,}') + print(f'Total characters: {stats.total_chars:,}') print('Total words:', len(' '.join(texts).split())) - chars_per_sec = 500 if torch.cuda.is_available() else 50 - eta = strfdelta((total_chars - processed_chars) / chars_per_sec) - print(f'Estimated time remaining (assuming {chars_per_sec} chars/sec): {eta}') + eta = strfdelta((stats.total_chars - stats.processed_chars) / stats.chars_per_sec) + print(f'Estimated time remaining (assuming {stats.chars_per_sec} chars/sec): {eta}') chapter_wav_files = [] for i, chapter in enumerate(selected_chapters, start=1): @@ -86,10 +93,10 @@ def main(file_path, voice, pick_manually, speed, output_folder='.', chapter_wav_files.append(chapter_wav_path) if Path(chapter_wav_path).exists(): print(f'File for chapter {i} already exists. Skipping') - processed_chars += len(text) + stats.processed_chars += len(text) if post_event: post_event('CORE_CHAPTER_FINISHED', chapter_index=chapter.chapter_index) - post_event('CORE_PROGRESS', progress=processed_chars * 100 // total_chars) + post_event('CORE_PROGRESS', progress=stats.processed_chars * 100 // stats.total_chars) continue if len(text.strip()) < 10: print(f'Skipping empty chapter {i}') @@ -100,29 +107,21 @@ def main(file_path, voice, pick_manually, speed, output_folder='.', text = f'{title} – {creator}.\n\n' + text start_time = time.time() pipeline = KPipeline(lang_code=voice[0]) # a for american or b for british etc. - - with yaspin(text=f'Reading chapter {i} ({len(text):,} characters)...', color="yellow") as spinner: - if post_event: post_event('CORE_CHAPTER_STARTED', chapter_index=chapter.chapter_index) - audio_segments = gen_audio_segments(pipeline, text, voice, speed, max_sentences=max_sentences) - if audio_segments: - final_audio = np.concatenate(audio_segments) - soundfile.write(chapter_wav_path, final_audio, sample_rate) - end_time = time.time() - delta_seconds = end_time - start_time - chars_per_sec = len(text) / delta_seconds - processed_chars += len(text) - spinner.ok("✅") - print(f'Estimated time remaining: {strfdelta((total_chars - processed_chars) / chars_per_sec)}') - print('Chapter written to', chapter_wav_path) - if post_event: post_event('CORE_CHAPTER_FINISHED', chapter_index=chapter.chapter_index) - print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)') - progress = processed_chars * 100 // total_chars - print('Progress:', f'{progress}%\n') - if post_event: post_event('CORE_PROGRESS', progress=progress) - else: - spinner.fail("❌") - print(f'Warning: No audio generated for chapter {i}') - chapter_wav_files.remove(chapter_wav_path) + if post_event: post_event('CORE_CHAPTER_STARTED', chapter_index=chapter.chapter_index) + audio_segments = gen_audio_segments( + pipeline, text, voice, speed, stats, post_event=post_event, max_sentences=max_sentences) + if audio_segments: + final_audio = np.concatenate(audio_segments) + soundfile.write(chapter_wav_path, final_audio, sample_rate) + end_time = time.time() + delta_seconds = end_time - start_time + chars_per_sec = len(text) / delta_seconds + print('Chapter written to', chapter_wav_path) + if post_event: post_event('CORE_CHAPTER_FINISHED', chapter_index=chapter.chapter_index) + print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)') + else: + print(f'Warning: No audio generated for chapter {i}') + chapter_wav_files.remove(chapter_wav_path) if has_ffmpeg: create_index_file(title, creator, chapter_wav_files, output_folder) @@ -160,7 +159,7 @@ def print_selected_chapters(document_chapters, chapters): ], headers=['#', 'Chapter', 'Text Length', 'Selected', 'First words'])) -def gen_audio_segments(pipeline, text, voice, speed, max_sentences=None): +def gen_audio_segments(pipeline, text, voice, speed, stats=None, max_sentences=None, post_event=None): nlp = spacy.load('xx_ent_wiki_sm') nlp.add_pipe('sentencizer') audio_segments = [] @@ -170,6 +169,9 @@ def gen_audio_segments(pipeline, text, voice, speed, max_sentences=None): if max_sentences and i > max_sentences: break for gs, ps, audio in pipeline(sent.text, voice=voice, speed=speed, split_pattern=r'\n\n\n'): audio_segments.append(audio) + if stats: stats.processed_chars += len(sent.text) + if post_event: post_event('CORE_PROGRESS', progress=stats.processed_chars * 100 // stats.total_chars) + print_progress(stats) return audio_segments diff --git a/audiblez/ui.py b/audiblez/ui.py index 49b69cb..e8bd21a 100644 --- a/audiblez/ui.py +++ b/audiblez/ui.py @@ -23,6 +23,8 @@ EVENTS = { 'CORE_FINISHED': NewEvent() } +border = 5 + class MainWindow(wx.Frame): def __init__(self, parent, title): @@ -38,7 +40,7 @@ class MainWindow(wx.Frame): self.create_layout() self.Centre() self.Show(True) - self.open_epub('../epub/mini.epub') + self.open_epub('../epub/lewis.epub') def create_menu(self): menubar = wx.MenuBar() @@ -61,34 +63,38 @@ class MainWindow(wx.Frame): def on_core_started(self, event): print('CORE_STARTED') - self.start_button.Hide() self.progress_bar_label.Show() self.progress_bar.Show() self.progress_bar.SetValue(0) - self.param_panel.Disable() + self.progress_bar.Layout() + self.params_panel.Layout() + self.synth_panel.Layout() for chapter_index, chapter in enumerate(self.document_chapters): if chapter in self.good_chapters: self.set_table_chapter_status(chapter.chapter_index, "Planned") def on_core_chapter_started(self, event): - print('CORE_CHAPTER_STARTED', event.chapter_index) + # print('CORE_CHAPTER_STARTED', event.chapter_index) self.set_table_chapter_status(event.chapter_index, "⏳ In Progress") def on_core_chapter_finished(self, event): - print('CORE_CHAPTER_FINISHED', event.chapter_index) + # print('CORE_CHAPTER_FINISHED', event.chapter_index) self.set_table_chapter_status(event.chapter_index, "✅ Done") self.start_button.Show() def on_core_progress(self, event): - print('CORE_PROGRESS', event.progress) + # print('CORE_PROGRESS', event.progress) self.progress_bar.SetValue(event.progress) + self.progress_bar_label.SetLabel(f"Synthesis Progress: {event.progress}%") + self.synth_panel.Layout() def on_core_finished(self, event): print('CORE_FINISHED', event.progress) + self.open_folder_with_explorer(event.output_folder) def set_table_chapter_status(self, chapter_index, status): - self.table.SetStringItem(chapter_index, 3, status) + self.table.SetItem(chapter_index, 3, status) def create_layout(self): # Panels layout looks like this: @@ -219,7 +225,8 @@ class MainWindow(wx.Frame): self.cover_bitmap.Layout() self.create_book_details_panel() - self.create_param_panel() + self.create_params_panel() + self.create_synthesis_panel() def create_book_details_panel(self): book_details_panel = wx.Panel(self.book_info_panel) @@ -249,22 +256,19 @@ class MainWindow(wx.Frame): book_details_sizer.Add(length_label, pos=(2, 0), flag=wx.ALL, border=5) book_details_sizer.Add(length_text, pos=(2, 1), flag=wx.ALL, border=5) - def create_param_panel(self): - # Add on the bottom right side, 3 dropdowns and a button - self.param_panel_box = wx.Panel(self.right_panel, style=wx.SUNKEN_BORDER) - param_panel_box_sizer = wx.StaticBoxSizer(wx.VERTICAL, self.param_panel_box, "Audiobook Parameters") - self.param_panel_box.SetSizer(param_panel_box_sizer) + def create_params_panel(self): + panel_box = wx.Panel(self.right_panel, style=wx.SUNKEN_BORDER) + panel_box_sizer = wx.StaticBoxSizer(wx.VERTICAL, panel_box, "Audiobook Parameters") + panel_box.SetSizer(panel_box_sizer) - self.param_panel = wx.Panel(self.param_panel_box) - param_panel_box_sizer.Add(self.param_panel, 1, wx.ALL | wx.EXPAND, 5) - self.right_sizer.Add(self.param_panel_box, 1, wx.ALL | wx.EXPAND, 5) - self.param_sizer = wx.GridBagSizer(10, 10) - self.param_panel.SetSizer(self.param_sizer) + panel = self.params_panel = wx.Panel(panel_box) + panel_box_sizer.Add(panel, 1, wx.ALL | wx.EXPAND, 5) + self.right_sizer.Add(panel_box, 1, wx.ALL | wx.EXPAND, 5) + sizer = wx.GridBagSizer(10, 10) + panel.SetSizer(sizer) - border = 5 - - engine_label = wx.StaticText(self.param_panel, label="Engine:") - engine_radio_panel = wx.Panel(self.param_panel) + engine_label = wx.StaticText(panel, label="Engine:") + engine_radio_panel = wx.Panel(panel) cpu_radio = wx.RadioButton(engine_radio_panel, label="CPU", style=wx.RB_GROUP) cuda_radio = wx.RadioButton(engine_radio_panel, label="CUDA") if torch.cuda.is_available(): @@ -272,8 +276,8 @@ class MainWindow(wx.Frame): else: cpu_radio.SetValue(True) cuda_radio.Disable() - self.param_sizer.Add(engine_label, pos=(0, 0), flag=wx.ALL, border=border) - self.param_sizer.Add(engine_radio_panel, pos=(0, 1), flag=wx.ALL, border=border) + sizer.Add(engine_label, pos=(0, 0), flag=wx.ALL, border=border) + sizer.Add(engine_radio_panel, pos=(0, 1), flag=wx.ALL, border=border) engine_radio_panel_sizer = wx.BoxSizer(wx.HORIZONTAL) engine_radio_panel.SetSizer(engine_radio_panel_sizer) engine_radio_panel_sizer.Add(cpu_radio, 0, wx.ALL, 5) @@ -285,48 +289,67 @@ class MainWindow(wx.Frame): for v in l: flag_and_voice_list.append(f'{flags[code]} {v}') - voice_label = wx.StaticText(self.param_panel, label="Voice:") + voice_label = wx.StaticText(panel, label="Voice:") default_voice = flag_and_voice_list[0] self.selected_voice = default_voice - voice_dropdown = wx.ComboBox(self.param_panel, choices=flag_and_voice_list, value=default_voice) + voice_dropdown = wx.ComboBox(panel, choices=flag_and_voice_list, value=default_voice) voice_dropdown.Bind(wx.EVT_COMBOBOX, self.on_select_voice) - self.param_sizer.Add(voice_label, pos=(1, 0), flag=wx.ALL, border=border) - self.param_sizer.Add(voice_dropdown, pos=(1, 1), flag=wx.ALL, border=border) + sizer.Add(voice_label, pos=(1, 0), flag=wx.ALL, border=border) + sizer.Add(voice_dropdown, pos=(1, 1), flag=wx.ALL, border=border) # Add dropdown for speed - speed_label = wx.StaticText(self.param_panel, label="Speed:") - speed_text_input = wx.TextCtrl(self.param_panel, value="1.0") + speed_label = wx.StaticText(panel, label="Speed:") + speed_text_input = wx.TextCtrl(panel, value="1.0") self.selected_speed = '1.0' speed_text_input.Bind(wx.EVT_TEXT, self.on_select_speed) - self.param_sizer.Add(speed_label, pos=(2, 0), flag=wx.ALL, border=border) - self.param_sizer.Add(speed_text_input, pos=(2, 1), flag=wx.ALL, border=border) + sizer.Add(speed_label, pos=(2, 0), flag=wx.ALL, border=border) + sizer.Add(speed_text_input, pos=(2, 1), flag=wx.ALL, border=border) # Add file dialog selector to select output folder - output_folder_label = wx.StaticText(self.param_panel, label="Output Folder:") - self.output_folder_text_ctrl = wx.TextCtrl(self.param_panel, value=os.path.abspath('.')) + output_folder_label = wx.StaticText(panel, label="Output Folder:") + self.output_folder_text_ctrl = wx.TextCtrl(panel, value=os.path.abspath('.')) self.output_folder_text_ctrl.SetEditable(False) # self.output_folder_text_ctrl.SetMinSize((200, -1)) - output_folder_button = wx.Button(self.param_panel, label="📂 Select") + output_folder_button = wx.Button(panel, label="📂 Select") output_folder_button.Bind(wx.EVT_BUTTON, self.open_output_folder_dialog) - self.param_sizer.Add(output_folder_label, pos=(3, 0), flag=wx.ALL, border=border) - self.param_sizer.Add(self.output_folder_text_ctrl, pos=(3, 1), flag=wx.ALL | wx.EXPAND, border=border) - self.param_sizer.Add(output_folder_button, pos=(4, 1), flag=wx.ALL, border=border) + sizer.Add(output_folder_label, pos=(3, 0), flag=wx.ALL, border=border) + sizer.Add(self.output_folder_text_ctrl, pos=(3, 1), flag=wx.ALL | wx.EXPAND, border=border) + sizer.Add(output_folder_button, pos=(4, 1), flag=wx.ALL, border=border) + + return panel + + def create_synthesis_panel(self): + # Think and identify layout issue with the folling code + panel_box = wx.Panel(self.right_panel, style=wx.SUNKEN_BORDER) + panel_box_sizer = wx.StaticBoxSizer(wx.VERTICAL, panel_box, "Audiobook Generation Status") + panel_box.SetSizer(panel_box_sizer) + + panel = self.synth_panel = wx.Panel(panel_box) + panel_box_sizer.Add(panel, 1, wx.ALL | wx.EXPAND, 5) + self.right_sizer.Add(panel_box, 1, wx.ALL | wx.EXPAND, 5) + sizer = wx.BoxSizer(wx.VERTICAL) + panel.SetSizer(sizer) # Add Start button - self.start_button = wx.Button(self.param_panel, label="🚀 Start Audiobook Synthesis") + self.start_button = wx.Button(panel, label="🚀 Start Audiobook Synthesis") self.start_button.Bind(wx.EVT_BUTTON, self.on_start) - self.param_sizer.Add(self.start_button, pos=(6, 0), span=(1, 3), flag=wx.ALL, border=border) + sizer.Add(self.start_button, 0, wx.ALL, 5) + + # Add hidden Stop button + # self.stop_button = wx.Button(panel, label="⏹️ Stop Synthesis") + # self.stop_button.Bind(wx.EVT_BUTTON, self.on_stop) + # sizer.Add(self.stop_button, 0, wx.ALL, 5) + # self.stop_button.Hide() # Add Progress Bar label: - self.progress_bar_label = wx.StaticText(self.param_panel, label="Synthesis Progress:") - self.param_sizer.Add(self.progress_bar_label, pos=(7, 0), flag=wx.ALL, border=border) - self.progress_bar = wx.Gauge(self.param_panel, range=100, style=wx.GA_PROGRESS) # vs GA_HORIZONTAL - self.param_sizer.Add(self.progress_bar, pos=(8, 0), span=(1, 3), flag=wx.ALL | wx.EXPAND, border=border) + self.progress_bar_label = wx.StaticText(panel, label="Synthesis Progress:") + sizer.Add(self.progress_bar_label, 0, wx.ALL, 5) + self.progress_bar = wx.Gauge(panel, range=100, style=wx.GA_PROGRESS) + self.progress_bar.SetMinSize((-1, 30)) + sizer.Add(self.progress_bar, 0, wx.ALL | wx.EXPAND, 5) self.progress_bar_label.Hide() self.progress_bar.Hide() - return self.param_panel - def open_output_folder_dialog(self, event): with wx.DirDialog(self, "Choose a directory:", style=wx.DD_DEFAULT_STYLE) as dialog: if dialog.ShowModal() == wx.ID_CANCEL: @@ -492,6 +515,9 @@ class MainWindow(wx.Frame): voice = self.selected_voice.split(' ')[1] # Remove the flag speed = float(self.selected_speed) selected_chapters = [chapter for chapter in self.document_chapters if chapter.is_selected] + self.start_button.Disable() + self.params_panel.Disable() + # self.stop_button.Show() print('Starting Audiobook Synthesis', dict(file_path=file_path, voice=voice, pick_manually=False, speed=speed)) self.core_thread = CoreThread(params=dict( file_path=file_path, voice=voice, pick_manually=False, speed=speed, @@ -514,6 +540,18 @@ class MainWindow(wx.Frame): def on_exit(self, event): self.Close() + def open_folder_with_explorer(self, folder_path): + try: + import platform + if platform.system() == 'Windows': + subprocess.Popen(['explorer', folder_path]) + elif platform.system() == 'Linux': + subprocess.Popen(['xdg-open', folder_path]) + elif platform.system() == 'Darwin': + subprocess.Popen(['open', folder_path]) + except Exception as e: + print(e) + class CoreThread(threading.Thread): def __init__(self, params): diff --git a/poetry.lock b/poetry.lock index 8c31131..29351b5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2737,21 +2737,6 @@ files = [ [package.extras] widechars = ["wcwidth"] -[[package]] -name = "termcolor" -version = "2.3.0" -description = "ANSI color formatting for output in terminal" -optional = false -python-versions = ">=3.7" -groups = ["main"] -files = [ - {file = "termcolor-2.3.0-py3-none-any.whl", hash = "sha256:3afb05607b89aed0ffe25202399ee0867ad4d3cb4180d98aaf8eefa6a5f7d475"}, - {file = "termcolor-2.3.0.tar.gz", hash = "sha256:b5b08f68937f138fe92f6c089b99f1e2da0ae56c52b78bf7075fd95420fd9a5a"}, -] - -[package.extras] -tests = ["pytest", "pytest-cov"] - [[package]] name = "thinc" version = "8.3.4" @@ -3344,21 +3329,6 @@ files = [ numpy = {version = "*", markers = "python_version >= \"3.0\" and python_version < \"3.12\""} six = "*" -[[package]] -name = "yaspin" -version = "3.1.0" -description = "Yet Another Terminal Spinner" -optional = false -python-versions = "<4.0,>=3.9" -groups = ["main"] -files = [ - {file = "yaspin-3.1.0-py3-none-any.whl", hash = "sha256:5e3d4dfb547d942cae6565718123f1ecfa93e745b7e51871ad2bbae839e71b73"}, - {file = "yaspin-3.1.0.tar.gz", hash = "sha256:7b97c7e257ec598f98cef9878e038bfa619ceb54ac31d61d8ead2b3128f8d7c7"}, -] - -[package.dependencies] -termcolor = ">=2.2.0,<2.4.0" - [[package]] name = "zipp" version = "3.21.0" @@ -3383,4 +3353,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.13" -content-hash = "77214b41b4424395465f8973bd250e4abebf48bf60a205e75cb322939ee3f6be" +content-hash = "f707c0bfe9b0824dcdc3287d3b4aab33ba66ec6ce1eacf15c4da4791572f7746" diff --git a/pyproject.toml b/pyproject.toml index 9f5c8c1..2fa0f01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ dependencies = [ "bs4 (>=0.0.2,<0.0.3)", "pydub (>=0.25.1,<0.26.0)", "spacy (>=3.8.3,<4.0.0)", - "yaspin (>=3.1.0,<4.0.0)", "kokoro (>=0.7.9,<0.8.0)", "misaki[zh] (>=0.7.10,<0.8.0)", "wxpython (>=4.2.2,<5.0.0)"