This commit is contained in:
Claudio Santini 2025-02-09 18:32:55 +01:00
parent 372ff5cbbc
commit db8fabf9ca
5 changed files with 125 additions and 114 deletions

View file

@ -5,7 +5,9 @@
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/audiblez)
![PyPI - Version](https://img.shields.io/pypi/v/audiblez)
### v3 Now with CUDA support and many more languages!
### v4 Now with Graphical interface, CUDA support, and many languages!
![Audiblez GUI on MacOSX](./imgs/mac.png)
Audiblez generates `.m4b` audiobooks from regular `.epub` e-books,
using Kokoro's high-quality speech synthesis.

View file

@ -2,8 +2,6 @@
# audiblez - A program to convert e-books into audiobooks using
# Kokoro-82M model for high-quality text-to-speech synthesis.
# by Claudio Santini 2025 - https://claudio.uk
from io import StringIO
import torch.cuda
import spacy
import ebooklib
@ -13,12 +11,12 @@ import time
import shutil
import subprocess
import re
from io import StringIO
from types import SimpleNamespace
from markdown import Markdown
from tabulate import tabulate
from pathlib import Path
from string import Formatter
from yaspin import yaspin
from bs4 import BeautifulSoup
from kokoro import KPipeline
from ebooklib import epub
@ -34,6 +32,13 @@ def load_spacy():
spacy.cli.download("xx_ent_wiki_sm")
def print_progress(stats):
progress = stats.processed_chars * 100 // stats.total_chars
eta = strfdelta((stats.total_chars - stats.processed_chars) / stats.chars_per_sec)
print(f'Estimated time remaining: {eta}')
print('Progress:', f'{progress}%\n')
def main(file_path, voice, pick_manually, speed, output_folder='.',
max_chapters=None, max_sentences=None, selected_chapters=None, post_event=None):
if post_event: post_event('CORE_STARTED')
@ -69,13 +74,15 @@ def main(file_path, voice, pick_manually, speed, output_folder='.',
if not has_ffmpeg:
print('\033[91m' + 'ffmpeg not found. Please install ffmpeg to create mp3 and m4b audiobook files.' + '\033[0m')
total_chars, processed_chars = sum(map(len, texts)), 0
stats = SimpleNamespace(
total_chars=sum(map(len, texts)),
processed_chars=0,
chars_per_sec=500 if torch.cuda.is_available() else 50)
print('Started at:', time.strftime('%H:%M:%S'))
print(f'Total characters: {total_chars:,}')
print(f'Total characters: {stats.total_chars:,}')
print('Total words:', len(' '.join(texts).split()))
chars_per_sec = 500 if torch.cuda.is_available() else 50
eta = strfdelta((total_chars - processed_chars) / chars_per_sec)
print(f'Estimated time remaining (assuming {chars_per_sec} chars/sec): {eta}')
eta = strfdelta((stats.total_chars - stats.processed_chars) / stats.chars_per_sec)
print(f'Estimated time remaining (assuming {stats.chars_per_sec} chars/sec): {eta}')
chapter_wav_files = []
for i, chapter in enumerate(selected_chapters, start=1):
@ -86,10 +93,10 @@ def main(file_path, voice, pick_manually, speed, output_folder='.',
chapter_wav_files.append(chapter_wav_path)
if Path(chapter_wav_path).exists():
print(f'File for chapter {i} already exists. Skipping')
processed_chars += len(text)
stats.processed_chars += len(text)
if post_event:
post_event('CORE_CHAPTER_FINISHED', chapter_index=chapter.chapter_index)
post_event('CORE_PROGRESS', progress=processed_chars * 100 // total_chars)
post_event('CORE_PROGRESS', progress=stats.processed_chars * 100 // stats.total_chars)
continue
if len(text.strip()) < 10:
print(f'Skipping empty chapter {i}')
@ -100,27 +107,19 @@ def main(file_path, voice, pick_manually, speed, output_folder='.',
text = f'{title} {creator}.\n\n' + text
start_time = time.time()
pipeline = KPipeline(lang_code=voice[0]) # a for american or b for british etc.
with yaspin(text=f'Reading chapter {i} ({len(text):,} characters)...', color="yellow") as spinner:
if post_event: post_event('CORE_CHAPTER_STARTED', chapter_index=chapter.chapter_index)
audio_segments = gen_audio_segments(pipeline, text, voice, speed, max_sentences=max_sentences)
audio_segments = gen_audio_segments(
pipeline, text, voice, speed, stats, post_event=post_event, max_sentences=max_sentences)
if audio_segments:
final_audio = np.concatenate(audio_segments)
soundfile.write(chapter_wav_path, final_audio, sample_rate)
end_time = time.time()
delta_seconds = end_time - start_time
chars_per_sec = len(text) / delta_seconds
processed_chars += len(text)
spinner.ok("")
print(f'Estimated time remaining: {strfdelta((total_chars - processed_chars) / chars_per_sec)}')
print('Chapter written to', chapter_wav_path)
if post_event: post_event('CORE_CHAPTER_FINISHED', chapter_index=chapter.chapter_index)
print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)')
progress = processed_chars * 100 // total_chars
print('Progress:', f'{progress}%\n')
if post_event: post_event('CORE_PROGRESS', progress=progress)
else:
spinner.fail("")
print(f'Warning: No audio generated for chapter {i}')
chapter_wav_files.remove(chapter_wav_path)
@ -160,7 +159,7 @@ def print_selected_chapters(document_chapters, chapters):
], headers=['#', 'Chapter', 'Text Length', 'Selected', 'First words']))
def gen_audio_segments(pipeline, text, voice, speed, max_sentences=None):
def gen_audio_segments(pipeline, text, voice, speed, stats=None, max_sentences=None, post_event=None):
nlp = spacy.load('xx_ent_wiki_sm')
nlp.add_pipe('sentencizer')
audio_segments = []
@ -170,6 +169,9 @@ def gen_audio_segments(pipeline, text, voice, speed, max_sentences=None):
if max_sentences and i > max_sentences: break
for gs, ps, audio in pipeline(sent.text, voice=voice, speed=speed, split_pattern=r'\n\n\n'):
audio_segments.append(audio)
if stats: stats.processed_chars += len(sent.text)
if post_event: post_event('CORE_PROGRESS', progress=stats.processed_chars * 100 // stats.total_chars)
print_progress(stats)
return audio_segments

View file

@ -23,6 +23,8 @@ EVENTS = {
'CORE_FINISHED': NewEvent()
}
border = 5
class MainWindow(wx.Frame):
def __init__(self, parent, title):
@ -38,7 +40,7 @@ class MainWindow(wx.Frame):
self.create_layout()
self.Centre()
self.Show(True)
self.open_epub('../epub/mini.epub')
self.open_epub('../epub/lewis.epub')
def create_menu(self):
menubar = wx.MenuBar()
@ -61,34 +63,38 @@ class MainWindow(wx.Frame):
def on_core_started(self, event):
print('CORE_STARTED')
self.start_button.Hide()
self.progress_bar_label.Show()
self.progress_bar.Show()
self.progress_bar.SetValue(0)
self.param_panel.Disable()
self.progress_bar.Layout()
self.params_panel.Layout()
self.synth_panel.Layout()
for chapter_index, chapter in enumerate(self.document_chapters):
if chapter in self.good_chapters:
self.set_table_chapter_status(chapter.chapter_index, "Planned")
def on_core_chapter_started(self, event):
print('CORE_CHAPTER_STARTED', event.chapter_index)
# print('CORE_CHAPTER_STARTED', event.chapter_index)
self.set_table_chapter_status(event.chapter_index, "⏳ In Progress")
def on_core_chapter_finished(self, event):
print('CORE_CHAPTER_FINISHED', event.chapter_index)
# print('CORE_CHAPTER_FINISHED', event.chapter_index)
self.set_table_chapter_status(event.chapter_index, "✅ Done")
self.start_button.Show()
def on_core_progress(self, event):
print('CORE_PROGRESS', event.progress)
# print('CORE_PROGRESS', event.progress)
self.progress_bar.SetValue(event.progress)
self.progress_bar_label.SetLabel(f"Synthesis Progress: {event.progress}%")
self.synth_panel.Layout()
def on_core_finished(self, event):
print('CORE_FINISHED', event.progress)
self.open_folder_with_explorer(event.output_folder)
def set_table_chapter_status(self, chapter_index, status):
self.table.SetStringItem(chapter_index, 3, status)
self.table.SetItem(chapter_index, 3, status)
def create_layout(self):
# Panels layout looks like this:
@ -219,7 +225,8 @@ class MainWindow(wx.Frame):
self.cover_bitmap.Layout()
self.create_book_details_panel()
self.create_param_panel()
self.create_params_panel()
self.create_synthesis_panel()
def create_book_details_panel(self):
book_details_panel = wx.Panel(self.book_info_panel)
@ -249,22 +256,19 @@ class MainWindow(wx.Frame):
book_details_sizer.Add(length_label, pos=(2, 0), flag=wx.ALL, border=5)
book_details_sizer.Add(length_text, pos=(2, 1), flag=wx.ALL, border=5)
def create_param_panel(self):
# Add on the bottom right side, 3 dropdowns and a button
self.param_panel_box = wx.Panel(self.right_panel, style=wx.SUNKEN_BORDER)
param_panel_box_sizer = wx.StaticBoxSizer(wx.VERTICAL, self.param_panel_box, "Audiobook Parameters")
self.param_panel_box.SetSizer(param_panel_box_sizer)
def create_params_panel(self):
panel_box = wx.Panel(self.right_panel, style=wx.SUNKEN_BORDER)
panel_box_sizer = wx.StaticBoxSizer(wx.VERTICAL, panel_box, "Audiobook Parameters")
panel_box.SetSizer(panel_box_sizer)
self.param_panel = wx.Panel(self.param_panel_box)
param_panel_box_sizer.Add(self.param_panel, 1, wx.ALL | wx.EXPAND, 5)
self.right_sizer.Add(self.param_panel_box, 1, wx.ALL | wx.EXPAND, 5)
self.param_sizer = wx.GridBagSizer(10, 10)
self.param_panel.SetSizer(self.param_sizer)
panel = self.params_panel = wx.Panel(panel_box)
panel_box_sizer.Add(panel, 1, wx.ALL | wx.EXPAND, 5)
self.right_sizer.Add(panel_box, 1, wx.ALL | wx.EXPAND, 5)
sizer = wx.GridBagSizer(10, 10)
panel.SetSizer(sizer)
border = 5
engine_label = wx.StaticText(self.param_panel, label="Engine:")
engine_radio_panel = wx.Panel(self.param_panel)
engine_label = wx.StaticText(panel, label="Engine:")
engine_radio_panel = wx.Panel(panel)
cpu_radio = wx.RadioButton(engine_radio_panel, label="CPU", style=wx.RB_GROUP)
cuda_radio = wx.RadioButton(engine_radio_panel, label="CUDA")
if torch.cuda.is_available():
@ -272,8 +276,8 @@ class MainWindow(wx.Frame):
else:
cpu_radio.SetValue(True)
cuda_radio.Disable()
self.param_sizer.Add(engine_label, pos=(0, 0), flag=wx.ALL, border=border)
self.param_sizer.Add(engine_radio_panel, pos=(0, 1), flag=wx.ALL, border=border)
sizer.Add(engine_label, pos=(0, 0), flag=wx.ALL, border=border)
sizer.Add(engine_radio_panel, pos=(0, 1), flag=wx.ALL, border=border)
engine_radio_panel_sizer = wx.BoxSizer(wx.HORIZONTAL)
engine_radio_panel.SetSizer(engine_radio_panel_sizer)
engine_radio_panel_sizer.Add(cpu_radio, 0, wx.ALL, 5)
@ -285,48 +289,67 @@ class MainWindow(wx.Frame):
for v in l:
flag_and_voice_list.append(f'{flags[code]} {v}')
voice_label = wx.StaticText(self.param_panel, label="Voice:")
voice_label = wx.StaticText(panel, label="Voice:")
default_voice = flag_and_voice_list[0]
self.selected_voice = default_voice
voice_dropdown = wx.ComboBox(self.param_panel, choices=flag_and_voice_list, value=default_voice)
voice_dropdown = wx.ComboBox(panel, choices=flag_and_voice_list, value=default_voice)
voice_dropdown.Bind(wx.EVT_COMBOBOX, self.on_select_voice)
self.param_sizer.Add(voice_label, pos=(1, 0), flag=wx.ALL, border=border)
self.param_sizer.Add(voice_dropdown, pos=(1, 1), flag=wx.ALL, border=border)
sizer.Add(voice_label, pos=(1, 0), flag=wx.ALL, border=border)
sizer.Add(voice_dropdown, pos=(1, 1), flag=wx.ALL, border=border)
# Add dropdown for speed
speed_label = wx.StaticText(self.param_panel, label="Speed:")
speed_text_input = wx.TextCtrl(self.param_panel, value="1.0")
speed_label = wx.StaticText(panel, label="Speed:")
speed_text_input = wx.TextCtrl(panel, value="1.0")
self.selected_speed = '1.0'
speed_text_input.Bind(wx.EVT_TEXT, self.on_select_speed)
self.param_sizer.Add(speed_label, pos=(2, 0), flag=wx.ALL, border=border)
self.param_sizer.Add(speed_text_input, pos=(2, 1), flag=wx.ALL, border=border)
sizer.Add(speed_label, pos=(2, 0), flag=wx.ALL, border=border)
sizer.Add(speed_text_input, pos=(2, 1), flag=wx.ALL, border=border)
# Add file dialog selector to select output folder
output_folder_label = wx.StaticText(self.param_panel, label="Output Folder:")
self.output_folder_text_ctrl = wx.TextCtrl(self.param_panel, value=os.path.abspath('.'))
output_folder_label = wx.StaticText(panel, label="Output Folder:")
self.output_folder_text_ctrl = wx.TextCtrl(panel, value=os.path.abspath('.'))
self.output_folder_text_ctrl.SetEditable(False)
# self.output_folder_text_ctrl.SetMinSize((200, -1))
output_folder_button = wx.Button(self.param_panel, label="📂 Select")
output_folder_button = wx.Button(panel, label="📂 Select")
output_folder_button.Bind(wx.EVT_BUTTON, self.open_output_folder_dialog)
self.param_sizer.Add(output_folder_label, pos=(3, 0), flag=wx.ALL, border=border)
self.param_sizer.Add(self.output_folder_text_ctrl, pos=(3, 1), flag=wx.ALL | wx.EXPAND, border=border)
self.param_sizer.Add(output_folder_button, pos=(4, 1), flag=wx.ALL, border=border)
sizer.Add(output_folder_label, pos=(3, 0), flag=wx.ALL, border=border)
sizer.Add(self.output_folder_text_ctrl, pos=(3, 1), flag=wx.ALL | wx.EXPAND, border=border)
sizer.Add(output_folder_button, pos=(4, 1), flag=wx.ALL, border=border)
return panel
def create_synthesis_panel(self):
# Think and identify layout issue with the folling code
panel_box = wx.Panel(self.right_panel, style=wx.SUNKEN_BORDER)
panel_box_sizer = wx.StaticBoxSizer(wx.VERTICAL, panel_box, "Audiobook Generation Status")
panel_box.SetSizer(panel_box_sizer)
panel = self.synth_panel = wx.Panel(panel_box)
panel_box_sizer.Add(panel, 1, wx.ALL | wx.EXPAND, 5)
self.right_sizer.Add(panel_box, 1, wx.ALL | wx.EXPAND, 5)
sizer = wx.BoxSizer(wx.VERTICAL)
panel.SetSizer(sizer)
# Add Start button
self.start_button = wx.Button(self.param_panel, label="🚀 Start Audiobook Synthesis")
self.start_button = wx.Button(panel, label="🚀 Start Audiobook Synthesis")
self.start_button.Bind(wx.EVT_BUTTON, self.on_start)
self.param_sizer.Add(self.start_button, pos=(6, 0), span=(1, 3), flag=wx.ALL, border=border)
sizer.Add(self.start_button, 0, wx.ALL, 5)
# Add hidden Stop button
# self.stop_button = wx.Button(panel, label="⏹️ Stop Synthesis")
# self.stop_button.Bind(wx.EVT_BUTTON, self.on_stop)
# sizer.Add(self.stop_button, 0, wx.ALL, 5)
# self.stop_button.Hide()
# Add Progress Bar label:
self.progress_bar_label = wx.StaticText(self.param_panel, label="Synthesis Progress:")
self.param_sizer.Add(self.progress_bar_label, pos=(7, 0), flag=wx.ALL, border=border)
self.progress_bar = wx.Gauge(self.param_panel, range=100, style=wx.GA_PROGRESS) # vs GA_HORIZONTAL
self.param_sizer.Add(self.progress_bar, pos=(8, 0), span=(1, 3), flag=wx.ALL | wx.EXPAND, border=border)
self.progress_bar_label = wx.StaticText(panel, label="Synthesis Progress:")
sizer.Add(self.progress_bar_label, 0, wx.ALL, 5)
self.progress_bar = wx.Gauge(panel, range=100, style=wx.GA_PROGRESS)
self.progress_bar.SetMinSize((-1, 30))
sizer.Add(self.progress_bar, 0, wx.ALL | wx.EXPAND, 5)
self.progress_bar_label.Hide()
self.progress_bar.Hide()
return self.param_panel
def open_output_folder_dialog(self, event):
with wx.DirDialog(self, "Choose a directory:", style=wx.DD_DEFAULT_STYLE) as dialog:
if dialog.ShowModal() == wx.ID_CANCEL:
@ -492,6 +515,9 @@ class MainWindow(wx.Frame):
voice = self.selected_voice.split(' ')[1] # Remove the flag
speed = float(self.selected_speed)
selected_chapters = [chapter for chapter in self.document_chapters if chapter.is_selected]
self.start_button.Disable()
self.params_panel.Disable()
# self.stop_button.Show()
print('Starting Audiobook Synthesis', dict(file_path=file_path, voice=voice, pick_manually=False, speed=speed))
self.core_thread = CoreThread(params=dict(
file_path=file_path, voice=voice, pick_manually=False, speed=speed,
@ -514,6 +540,18 @@ class MainWindow(wx.Frame):
def on_exit(self, event):
self.Close()
def open_folder_with_explorer(self, folder_path):
try:
import platform
if platform.system() == 'Windows':
subprocess.Popen(['explorer', folder_path])
elif platform.system() == 'Linux':
subprocess.Popen(['xdg-open', folder_path])
elif platform.system() == 'Darwin':
subprocess.Popen(['open', folder_path])
except Exception as e:
print(e)
class CoreThread(threading.Thread):
def __init__(self, params):

32
poetry.lock generated
View file

@ -2737,21 +2737,6 @@ files = [
[package.extras]
widechars = ["wcwidth"]
[[package]]
name = "termcolor"
version = "2.3.0"
description = "ANSI color formatting for output in terminal"
optional = false
python-versions = ">=3.7"
groups = ["main"]
files = [
{file = "termcolor-2.3.0-py3-none-any.whl", hash = "sha256:3afb05607b89aed0ffe25202399ee0867ad4d3cb4180d98aaf8eefa6a5f7d475"},
{file = "termcolor-2.3.0.tar.gz", hash = "sha256:b5b08f68937f138fe92f6c089b99f1e2da0ae56c52b78bf7075fd95420fd9a5a"},
]
[package.extras]
tests = ["pytest", "pytest-cov"]
[[package]]
name = "thinc"
version = "8.3.4"
@ -3344,21 +3329,6 @@ files = [
numpy = {version = "*", markers = "python_version >= \"3.0\" and python_version < \"3.12\""}
six = "*"
[[package]]
name = "yaspin"
version = "3.1.0"
description = "Yet Another Terminal Spinner"
optional = false
python-versions = "<4.0,>=3.9"
groups = ["main"]
files = [
{file = "yaspin-3.1.0-py3-none-any.whl", hash = "sha256:5e3d4dfb547d942cae6565718123f1ecfa93e745b7e51871ad2bbae839e71b73"},
{file = "yaspin-3.1.0.tar.gz", hash = "sha256:7b97c7e257ec598f98cef9878e038bfa619ceb54ac31d61d8ead2b3128f8d7c7"},
]
[package.dependencies]
termcolor = ">=2.2.0,<2.4.0"
[[package]]
name = "zipp"
version = "3.21.0"
@ -3383,4 +3353,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.9,<3.13"
content-hash = "77214b41b4424395465f8973bd250e4abebf48bf60a205e75cb322939ee3f6be"
content-hash = "f707c0bfe9b0824dcdc3287d3b4aab33ba66ec6ce1eacf15c4da4791572f7746"

View file

@ -14,7 +14,6 @@ dependencies = [
"bs4 (>=0.0.2,<0.0.3)",
"pydub (>=0.25.1,<0.26.0)",
"spacy (>=3.8.3,<4.0.0)",
"yaspin (>=3.1.0,<4.0.0)",
"kokoro (>=0.7.9,<0.8.0)",
"misaki[zh] (>=0.7.10,<0.8.0)",
"wxpython (>=4.2.2,<5.0.0)"