fixed chapters

This commit is contained in:
Claudio Santini 2025-01-15 09:31:50 +01:00
parent 97135bf875
commit 7ca3f76601
2 changed files with 17 additions and 4 deletions

View file

@ -11,6 +11,7 @@ import subprocess
import soundfile as sf import soundfile as sf
import ebooklib import ebooklib
import warnings import warnings
import re
from pathlib import Path from pathlib import Path
from string import Formatter from string import Formatter
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -83,13 +84,25 @@ def extract_texts(chapters):
return texts return texts
def find_chapters(book, verbose=False): def is_chapter(c):
is_chapter = lambda c: 'chapter' in c.get_name().lower() or 'part' in c.get_name().lower() name = c.get_name().lower()
part = r"part\d{1,3}"
if re.search(part, name):
return True
ch = r"ch\d{1,3}"
if re.search(ch, name):
return True
if 'chapter' in name:
return True
def find_chapters(book, verbose=True):
chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)] chapters = [c for c in book.get_items() if c.get_type() == ebooklib.ITEM_DOCUMENT and is_chapter(c)]
if verbose: if verbose:
for item in book.get_items(): for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT: if item.get_type() == ebooklib.ITEM_DOCUMENT:
print((item.get_name(), len(item.get_body_content()), 'YES' if item in chapters else '-')) # print(f"'{item.get_name()}'" + ', #' + str(len(item.get_body_content())))
print(f'{item.get_name()}'.ljust(60), str(len(item.get_body_content())).ljust(15), 'X' if item in chapters else '-')
return chapters return chapters

View file

@ -1,6 +1,6 @@
[project] [project]
name = "audiblez" name = "audiblez"
version = "0.1.5" version = "0.1.7"
description = "" description = ""
authors = [ authors = [
{ name = "Claudio Santini", email = "hireclaudio@gmail.com" } { name = "Claudio Santini", email = "hireclaudio@gmail.com" }