extract images from epubs

This commit is contained in:
xenofem 2024-12-08 17:07:23 -05:00
parent 83a836f9b5
commit 380a481d9b

View file

@ -97,8 +97,20 @@ ALT_VERSIONS = [
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff', '.bmp']
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store', 'desktop.ini']
IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd', '.mp4']
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store', 'desktop.ini', 'mimetype']
IGNOREABLE_EXTENSIONS = [
'.txt',
'.xml',
'.html',
'.htm',
'.xhtml',
'.css',
'.js',
'.psd',
'.mp4',
'.opf',
'.ncx',
]
PDF_CONVERSION_DPI = 300
PDF_PREVIEW_DPI = 72
@ -179,7 +191,7 @@ def extract(args):
any_skipped = False
for archive_path in args.archives:
if archive_path.suffix.lower() == '.zip':
if archive_path.suffix.lower() in ['.zip', '.epub']:
work_id = archive_path.stem
work_extract_path = args.destdir / 'extract' / work_id