extract images from epubs

2024-12-08 17:07:23 -05:00 · 2024-12-08 17:07:23 -05:00 · 380a481d9b
commit 380a481d9b
parent 83a836f9b5
1 changed files with 15 additions and 3 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -97,8 +97,20 @@ ALT_VERSIONS = [
 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff', '.bmp']
-IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store', 'desktop.ini']
+IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store', 'desktop.ini', 'mimetype']
-IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd', '.mp4']
+IGNOREABLE_EXTENSIONS = [
    '.txt',
    '.xml',
    '.html',
    '.htm',
    '.xhtml',
    '.css',
    '.js',
    '.psd',
    '.mp4',
    '.opf',
    '.ncx',
 ]
 PDF_CONVERSION_DPI = 300
 PDF_PREVIEW_DPI = 72
@ -179,7 +191,7 @@ def extract(args):
    any_skipped = False
    for archive_path in args.archives:
-        if archive_path.suffix.lower() == '.zip':
+        if archive_path.suffix.lower() in ['.zip', '.epub']:
            work_id = archive_path.stem
            work_extract_path = args.destdir / 'extract' / work_id