don't get tripped up as much by random extra files during collation

This commit is contained in:
xenofem 2024-01-22 07:56:45 -05:00
parent 7596ed49be
commit 2ab99c9a4b

View file

@ -19,6 +19,9 @@ NUMBER_REGEX = re.compile('[0-9]+')
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm']
def open_zipfile_with_encoding(path): def open_zipfile_with_encoding(path):
try: try:
return zipfile.ZipFile(path, metadata_encoding="utf-8") return zipfile.ZipFile(path, metadata_encoding="utf-8")
@ -151,6 +154,12 @@ def link_ordered_files(ordering, dest, start_index=0):
link_path = dest / f'{idx:04d}{ext}' link_path = dest / f'{idx:04d}{ext}'
link_path.symlink_to(relpath(src_path, dest)) link_path.symlink_to(relpath(src_path, dest))
def ls_ignore(directory):
return [
path for path in directory.iterdir()
if path.name not in IGNOREABLE_FILES and path.suffix.lower() not in IGNOREABLE_EXTENSIONS
]
def collate(args): def collate(args):
con = sqlite3.connect(args.destdir / 'meta.db') con = sqlite3.connect(args.destdir / 'meta.db')
cur = con.cursor() cur = con.cursor()
@ -171,11 +180,11 @@ def collate(args):
if work_id in hint_map: if work_id in hint_map:
hint = hint_map[work_id] hint = hint_map[work_id]
entries = list(hint.iterdir()) if hint.is_dir() else [hint] entries = ls_ignore(hint) if hint.is_dir() else [hint]
else: else:
search_dir = work_path search_dir = work_path
while True: while True:
entries = list(search_dir.iterdir()) entries = ls_ignore(search_dir)
if len(entries) == 1 and entries[0].is_dir(): if len(entries) == 1 and entries[0].is_dir():
search_dir = entries[0] search_dir = entries[0]
else: else: