refactor checking file extensions

This commit is contained in:
xenofem 2024-02-07 19:18:19 -05:00
parent c24c811115
commit 9c6328659f

View file

@ -329,8 +329,17 @@ def link_ordered_files(ordering, dest, start_index):
link_path = dest / f'{idx:04d}{ext}' link_path = dest / f'{idx:04d}{ext}'
link_path.symlink_to(relpath(src_path, dest)) link_path.symlink_to(relpath(src_path, dest))
def check_extension(path, exts):
return path.suffix.lower() in exts
def is_pdf(path):
check_extension(path, ['.pdf'])
def is_image(path):
check_extension(path, IMAGE_FILE_EXTENSIONS)
def ignoreable(path): def ignoreable(path):
return path.name in IGNOREABLE_FILES or path.suffix.lower() in IGNOREABLE_EXTENSIONS return path.name in IGNOREABLE_FILES or check_extension(path, IGNOREABLE_EXTENSIONS)
def ls_ignore(directory): def ls_ignore(directory):
return [ return [
@ -455,7 +464,7 @@ def try_collate_images_vs_pdf(srcs, dest, start_index):
return False return False
outer_pdf = pdfs[0] outer_pdf = pdfs[0]
inner_pdfs = [f for f in descendant_files_ignore(outer_pdf) if f.suffix.lower() == '.pdf'] inner_pdfs = [f for f in descendant_files_ignore(outer_pdf) if is_pdf(f)]
if len(inner_pdfs) != 1: if len(inner_pdfs) != 1:
return False return False
inner_pdf = inner_pdfs[0] inner_pdf = inner_pdfs[0]
@ -465,7 +474,7 @@ def try_collate_images_vs_pdf(srcs, dest, start_index):
non_images = [] non_images = []
descendant_files = [f for src in non_pdf_srcs for f in descendant_files_ignore(src)] descendant_files = [f for src in non_pdf_srcs for f in descendant_files_ignore(src)]
for f in descendant_files: for f in descendant_files:
if f.suffix.lower() in IMAGE_FILE_EXTENSIONS: if is_image(f):
images.append(f) images.append(f)
else: else:
non_images.append(f) non_images.append(f)
@ -495,7 +504,7 @@ def collate_from_paths(srcs, dest, start_index):
if len(srcs) == 1 and srcs[0].is_dir(): if len(srcs) == 1 and srcs[0].is_dir():
return collate_from_paths(ls_ignore(srcs[0]), dest, start_index) return collate_from_paths(ls_ignore(srcs[0]), dest, start_index)
if len(srcs) == 1 and srcs[0].suffix.lower() == '.pdf': if len(srcs) == 1 and is_pdf(srcs[0]):
print(f'Extracting images from {srcs[0]}') print(f'Extracting images from {srcs[0]}')
return link_pdf(srcs[0], dest, start_index) return link_pdf(srcs[0], dest, start_index)
@ -522,7 +531,7 @@ def collate_from_paths(srcs, dest, start_index):
if cover_split != False: if cover_split != False:
return cover_split return cover_split
if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs): if all(src.is_file() and is_image(src) for src in srcs):
ordering = complete_prefix_number_ordering(srcs) ordering = complete_prefix_number_ordering(srcs)
if ordering: if ordering:
print(f'Symlinking image files: {ordering[0]}...') print(f'Symlinking image files: {ordering[0]}...')
@ -558,17 +567,17 @@ def manual_collate(args):
index = 0 index = 0
for path in args.paths: for path in args.paths:
if path.is_dir(): if path.is_dir():
entries = [p for p in path.iterdir() if p.suffix.lower() in IMAGE_FILE_EXTENSIONS] entries = [p for p in path.iterdir() if p.is_file() and is_image(p)]
ordering = complete_prefix_number_ordering(entries) ordering = complete_prefix_number_ordering(entries)
if ordering is None: if ordering is None:
ordering = entries ordering = entries
ordering.sort() ordering.sort()
link_ordered_files(ordering, collation_dir, index) link_ordered_files(ordering, collation_dir, index)
index += len(ordering) index += len(ordering)
elif path.suffix.lower() in IMAGE_FILE_EXTENSIONS: elif is_image(path):
link_ordered_files([path], collation_dir, index) link_ordered_files([path], collation_dir, index)
index += 1 index += 1
elif path.suffix.lower() == ".pdf": elif is_pdf(path):
pdf_page_count = link_pdf(path, collation_dir, index) pdf_page_count = link_pdf(path, collation_dir, index)
if pdf_page_count is None: if pdf_page_count is None:
return return
@ -587,10 +596,10 @@ def analyze(args):
for f in files: for f in files:
print(f'{relpath(f, extract_dir)}', end='') print(f'{relpath(f, extract_dir)}', end='')
if f.suffix.lower() in IMAGE_FILE_EXTENSIONS: if is_image(f):
size = standalone_image_size(f) size = standalone_image_size(f)
print(f'\t{fmt_size(size)}') print(f'\t{fmt_size(size)}')
elif f.suffix.lower() == '.pdf': elif is_pdf(f):
sizes = pdf_image_sizes(f) sizes = pdf_image_sizes(f)
if len(sizes) == 0: if len(sizes) == 0:
print(f'\tContains no images') print(f'\tContains no images')