Adds generation of chapter markings in the audiobook

This commit is contained in:
Marco Dondero 2025-01-16 07:52:13 +01:00
parent 75cb3d1a64
commit 14223ea184

View file

@ -46,6 +46,7 @@ def main(kokoro, file_path, lang, voice, pick_manually):
i = 1 i = 1
chapter_mp3_files = [] chapter_mp3_files = []
durations = {}
for text in texts: for text in texts:
if len(text) == 0: if len(text) == 0:
continue continue
@ -61,6 +62,7 @@ def main(kokoro, file_path, lang, voice, pick_manually):
start_time = time.time() start_time = time.time()
samples, sample_rate = kokoro.create(text, voice=voice, speed=1.0, lang=lang) samples, sample_rate = kokoro.create(text, voice=voice, speed=1.0, lang=lang)
sf.write(f'{chapter_filename}', samples, sample_rate) sf.write(f'{chapter_filename}', samples, sample_rate)
durations[chapter_filename] = len(samples)/sample_rate
end_time = time.time() end_time = time.time()
delta_seconds = end_time - start_time delta_seconds = end_time - start_time
chars_per_sec = len(text) / delta_seconds chars_per_sec = len(text) / delta_seconds
@ -73,6 +75,7 @@ def main(kokoro, file_path, lang, voice, pick_manually):
print('Progress:', f'{progress}%') print('Progress:', f'{progress}%')
i += 1 i += 1
if has_ffmpeg: if has_ffmpeg:
create_index_file(title, creator, chapter_mp3_files, durations)
create_m4b(chapter_mp3_files, filename) create_m4b(chapter_mp3_files, filename)
@ -138,23 +141,41 @@ def strfdelta(tdelta, fmt='{D:02}d {H:02}h {M:02}m {S:02}s'):
return f.format(fmt, **values) return f.format(fmt, **values)
def create_m4b(chaptfer_files, filename): def create_m4b(chapter_files, filename):
tmp_filename = filename.replace('.epub', '.tmp.m4a') tmp_filename = filename.replace('.epub', '.tmp.m4a')
if not Path(tmp_filename).exists(): if not Path(tmp_filename).exists():
combined_audio = AudioSegment.empty() combined_audio = AudioSegment.empty()
for wav_file in chaptfer_files: for wav_file in chapter_files:
audio = AudioSegment.from_wav(wav_file) audio = AudioSegment.from_wav(wav_file)
combined_audio += audio combined_audio += audio
print('Converting to Mp4...') print('Converting to Mp4...')
combined_audio.export(tmp_filename, format="mp4", codec="aac", bitrate="64k") combined_audio.export(tmp_filename, format="mp4", codec="aac", bitrate="64k")
final_filename = filename.replace('.epub', '.m4b') final_filename = filename.replace('.epub', '.m4b')
print('Creating M4B file...') print('Creating M4B file...')
proc = subprocess.run(['ffmpeg', '-i', f'{tmp_filename}', '-c', 'copy', '-f', 'mp4', f'{final_filename}']) proc = subprocess.run(['ffmpeg', '-i', f'{tmp_filename}', '-i', 'chapters.txt', '-map', '0', '-map_metadata', '1', '-c', 'copy', '-f', 'mp4', f'{final_filename}'])
Path(tmp_filename).unlink() Path(tmp_filename).unlink()
if proc.returncode == 0: if proc.returncode == 0:
print(f'{final_filename} created. Enjoy your audiobook.') print(f'{final_filename} created. Enjoy your audiobook.')
print('Feel free to delete the intermediary .wav chapter files, the .m4b is all you need.') print('Feel free to delete the intermediary .wav chapter files, the .m4b is all you need.')
def probe_duration(file_name):
args = ['ffprobe', '-i', file_name, '-show_entries', 'format=duration', '-v', 'quiet', '-of', 'default=noprint_wrappers=1:nokey=1']
proc = subprocess.run(args, capture_output=True, text=True, check=True)
return float(proc.stdout.strip())
def create_index_file(title, creator, chapter_mp3_files, durations):
with open("chapters.txt", "w") as f:
f.write(f";FFMETADATA1\ntitle={title}\nartist={creator}\n\n")
start = 0
i = 0
for c in chapter_mp3_files:
if c not in durations:
durations[c] = probe_duration(c)
end = start + (int)(durations[c] * 1000)
f.write(f"[CHAPTER]\nTIMEBASE=1/1000\nSTART={start}\nEND={end}\ntitle=Chapter {i}\n\n")
i += 1
start = end
def cli_main(): def cli_main():
if not Path('kokoro-v0_19.onnx').exists() or not Path('voices.json').exists(): if not Path('kokoro-v0_19.onnx').exists() or not Path('voices.json').exists():
@ -172,7 +193,7 @@ def cli_main():
parser.add_argument('epub_file_path', help='Path to the epub file') parser.add_argument('epub_file_path', help='Path to the epub file')
parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn') parser.add_argument('-l', '--lang', default='en-gb', help='Language code: en-gb, en-us, fr-fr, ja, ko, cmn')
parser.add_argument('-v', '--voice', default=default_voice, help=f'Choose narrating voice: {voices_str}') parser.add_argument('-v', '--voice', default=default_voice, help=f'Choose narrating voice: {voices_str}')
parser.add_argument('-p', '--pick', default=False, help=f'Manually select which chapters to read in the audiobook', parser.add_argument('-p', '--pick', default=False, help='Manually select which chapters to read in the audiobook',
action='store_true') action='store_true')
if len(sys.argv) == 1: if len(sys.argv) == 1:
parser.print_help(sys.stderr) parser.print_help(sys.stderr)