mirror of
https://github.com/santinic/audiblez.git
synced 2025-08-05 16:48:55 +00:00
fix: improve audio processing by handling large text chunks and ensuring valid audio generation
This commit is contained in:
parent
acea1ac31c
commit
ec2e678c51
1 changed files with 35 additions and 14 deletions
49
audiblez.py
49
audiblez.py
|
@ -7,6 +7,8 @@ import sys
|
|||
import time
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
import numpy as np
|
||||
import soundfile
|
||||
import ebooklib
|
||||
import warnings
|
||||
|
@ -63,7 +65,7 @@ def main(pipeline, file_path, voice, pick_manually, speed):
|
|||
print('Started at:', time.strftime('%H:%M:%S'))
|
||||
print(f'Total characters: {total_chars:,}')
|
||||
print('Total words:', len(' '.join(texts).split()))
|
||||
chars_per_sec = 500 if torch.cuda.is_available() else 50 # assume 50 or 500 chars per second at the beginning
|
||||
chars_per_sec = 500 if torch.cuda.is_available() else 50
|
||||
print(f'Estimated time remaining (assuming {chars_per_sec} chars/sec): {strfdelta((total_chars - processed_chars) / chars_per_sec)}')
|
||||
|
||||
chapter_mp3_files = []
|
||||
|
@ -81,24 +83,43 @@ def main(pipeline, file_path, voice, pick_manually, speed):
|
|||
if i == 1:
|
||||
text = intro + '.\n\n' + text
|
||||
start_time = time.time()
|
||||
generator = pipeline(text, voice=voice, speed=speed)
|
||||
for gs, ps, audio in generator:
|
||||
soundfile.write(chapter_filename, audio, sample_rate)
|
||||
end_time = time.time()
|
||||
delta_seconds = end_time - start_time
|
||||
chars_per_sec = len(text) / delta_seconds
|
||||
processed_chars += len(text)
|
||||
print(f'Estimated time remaining: {strfdelta((total_chars - processed_chars) / chars_per_sec)}')
|
||||
print('Chapter written to', chapter_filename)
|
||||
print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)')
|
||||
progress = processed_chars * 100 // total_chars
|
||||
print('Progress:', f'{progress}%\n')
|
||||
audio_segments = []
|
||||
chunk_size = 5000 # Adjust chunk size as needed
|
||||
|
||||
# Fixed the text processing loop
|
||||
remaining_text = text
|
||||
while remaining_text:
|
||||
chunk = remaining_text[:chunk_size]
|
||||
remaining_text = remaining_text[chunk_size:]
|
||||
|
||||
# Process the chunk
|
||||
chunk_segments = []
|
||||
for gs, ps, audio in pipeline(chunk, voice=voice, speed=speed):
|
||||
chunk_segments.append(audio)
|
||||
|
||||
if chunk_segments: # Only append if we got valid audio segments
|
||||
audio_segments.extend(chunk_segments)
|
||||
|
||||
if audio_segments: # Only concatenate if we have segments
|
||||
final_audio = np.concatenate(audio_segments)
|
||||
soundfile.write(chapter_filename, final_audio, sample_rate)
|
||||
end_time = time.time()
|
||||
delta_seconds = end_time - start_time
|
||||
chars_per_sec = len(text) / delta_seconds
|
||||
processed_chars += len(text)
|
||||
print(f'Estimated time remaining: {strfdelta((total_chars - processed_chars) / chars_per_sec)}')
|
||||
print('Chapter written to', chapter_filename)
|
||||
print(f'Chapter {i} read in {delta_seconds:.2f} seconds ({chars_per_sec:.0f} characters per second)')
|
||||
progress = processed_chars * 100 // total_chars
|
||||
print('Progress:', f'{progress}%\n')
|
||||
else:
|
||||
print(f'Warning: No audio generated for chapter {i}')
|
||||
chapter_mp3_files.remove(chapter_filename)
|
||||
|
||||
if has_ffmpeg:
|
||||
create_index_file(title, by_creator, chapter_mp3_files)
|
||||
create_m4b(chapter_mp3_files, filename, cover_image)
|
||||
|
||||
|
||||
def extract_texts(chapters):
|
||||
texts = []
|
||||
for chapter in chapters:
|
||||
|
|
Loading…
Add table
Reference in a new issue