From 380a481d9be01184cf0dbbe620bbc55ff68f1597 Mon Sep 17 00:00:00 2001 From: xenofem Date: Sun, 8 Dec 2024 17:07:23 -0500 Subject: [PATCH] extract images from epubs --- dlibrary/dlibrary.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 7b97233..8868f83 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -97,8 +97,20 @@ ALT_VERSIONS = [ IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff', '.bmp'] -IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store', 'desktop.ini'] -IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd', '.mp4'] +IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store', 'desktop.ini', 'mimetype'] +IGNOREABLE_EXTENSIONS = [ + '.txt', + '.xml', + '.html', + '.htm', + '.xhtml', + '.css', + '.js', + '.psd', + '.mp4', + '.opf', + '.ncx', +] PDF_CONVERSION_DPI = 300 PDF_PREVIEW_DPI = 72 @@ -179,7 +191,7 @@ def extract(args): any_skipped = False for archive_path in args.archives: - if archive_path.suffix.lower() == '.zip': + if archive_path.suffix.lower() in ['.zip', '.epub']: work_id = archive_path.stem work_extract_path = args.destdir / 'extract' / work_id