This commit is contained in:
Claudio Santini 2025-01-14 18:38:26 +01:00
parent 4af031eade
commit b616003452
3 changed files with 58 additions and 16 deletions

View file

@ -17,6 +17,7 @@ from string import Formatter
from bs4 import BeautifulSoup
from kokoro_onnx import Kokoro
from ebooklib import epub
from pydub import AudioSegment
kokoro = Kokoro('kokoro-v0_19.onnx', 'voices.json')
@ -41,8 +42,12 @@ def main(file_path, lang, voice):
print('Total words:', len(' '.join(texts).split(' ')))
i = 1
chapter_mp3_files = []
for text in texts:
if i > 8:
continue
chapter_filename = filename.replace('.epub', f'_chapter_{i}.wav')
chapter_mp3_files.append(chapter_filename)
if Path(chapter_filename).exists() or Path(chapter_filename.replace('.wav', '.mp3')).exists():
print(f'File for chapter {i} already exists. Skipping')
i += 1
@ -61,12 +66,15 @@ def main(file_path, lang, voice):
remaining_chars = sum([len(t) for t in texts[i - 1:]])
remaining_time = remaining_chars / chars_per_sec
print(f'Estimated time remaining: {strfdelta(remaining_time)}')
progress = int((total_chars - remaining_chars) / total_chars * 100)
print(f'Progress: {progress}%')
print()
if has_ffmpeg:
convert_to_mp3(chapter_filename)
# if has_ffmpeg:
# convert_to_mp3(chapter_filename)
i += 1
if has_ffmpeg:
create_m4b(filename)
create_m4b(chapter_mp3_files, filename)
def extract_texts(chapters):
@ -107,19 +115,41 @@ def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
return f.format(fmt, **values)
def convert_to_mp3(wav_file):
mp3_file = wav_file.replace('.wav', '.mp3')
print(f'In parallel, converting {wav_file} to {mp3_file}...')
subprocess.Popen(['ffmpeg', '-i', wav_file, mp3_file, '&& rm', wav_file, '&& echo "mp3 convertion done."'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# def convert_to_mp3(wav_file):
# mp3_file = wav_file.replace('.wav', '.mp3')
# print(f'In parallel, converting {wav_file} to {mp3_file}...')
# subprocess.Popen(['ffmpeg', '-i', wav_file, mp3_file, '&& rm', wav_file, '&& echo "mp3 convertion done."'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def create_m4b(filename):
if shutil.which('ffmpeg') is None:
return
# def create_m4b(chapter_files, filename):
# if shutil.which('ffmpeg') is None:
# return
# print('Creating M4B file...')
# filename_m4b = filename.replace('.epub', '.m4b')
# concat_str = '|'.join(chapter_files)
# cmd = ['ffmpeg', '-i', f'concat:{concat_str}', '-c:a', 'aac', '-b:a', '64k', '-f', 'mp4', f'{filename_m4b}']
# print(cmd)
# proc = subprocess.run(cmd)
# if proc.returncode == 0:
# print(f'{filename_m4b} created. Enjoy your audiobook.')
def create_m4b(chaptfer_files, filename):
tmp_filename = filename.replace('.epub', '.tmp.m4a')
if not Path(tmp_filename).exists():
combined_audio = AudioSegment.empty()
for wav_file in chaptfer_files:
audio = AudioSegment.from_wav(wav_file)
combined_audio += audio
print('Creating M4A file...')
combined_audio.export(tmp_filename, format="mp4", codec="aac", bitrate="64k")
final_filename = filename.replace('.epub', '.m4b')
print('Creating M4B file...')
filename_m4b = filename.replace('.epub', '.m4b')
subprocess.run(['ffmpeg', '-i', 'concat:chapter_*.mp3', '-acodec', 'copy', f'{filename_m4b}'])
print(f'{filename_m4b} created. Enjoy your audbiook.')
proc = subprocess.run(['ffmpeg', '-i', f'{tmp_filename}', '-c', 'copy', '-f', 'mp4', f'{final_filename}'])
Path(tmp_filename).unlink()
if proc.returncode == 0:
print(f'{final_filename} created. Enjoy your audiobook.')
print('Feel free to delete the intermediary .wav chapter files, the .m4b is all you need.')
if __name__ == '__main__':
@ -132,7 +162,6 @@ if __name__ == '__main__':
parser.add_argument('epub_file_path', help='Path to the epub file')
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
parser.add_argument('-v', '--voice', default=default_voice, help=f'Choose narrating voice: {voices_str}')
parser.add_argument('-w', '--wav', help="Output to .wav files (instead of mp3, which the default)", action='store_true')
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)

14
poetry.lock generated
View file

@ -1249,6 +1249,18 @@ files = [
{file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
]
[[package]]
name = "pydub"
version = "0.25.1"
description = "Manipulate audio with an simple and easy high level interface"
optional = false
python-versions = "*"
groups = ["main"]
files = [
{file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"},
{file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"},
]
[[package]]
name = "pylatexenc"
version = "2.10"
@ -1906,4 +1918,4 @@ zstd = ["zstandard (>=0.18.0)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.11,<3.13"
content-hash = "73ab66a24f6b11201dee9f0ab923ef5b06568f9d850ff05cf17508e6c829d3d4"
content-hash = "dcb42f7e6478f1428516c4687c5fca4a4b8eb2f16bd751c056fb16f3b03bdd40"

View file

@ -73,7 +73,8 @@ dependencies = [
"threadpoolctl (==3.5.0)",
"typing-extensions (==4.12.2)",
"uritemplate (==4.1.1)",
"urllib3 (==2.3.0)"
"urllib3 (==2.3.0)",
"pydub (>=0.25.1,<0.26.0)"
]