smarter automatic collation of textless versions

This commit is contained in:
xenofem 2024-02-06 09:26:39 -05:00
parent 25374a5ba3
commit 83d99f4585

View file

@ -24,6 +24,8 @@ DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
FAKKU_ID_REGEX = re.compile('.*_FAKKU$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store'] IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
@ -191,6 +193,9 @@ def link_pdf(src, dest, start_index=0):
f.write(image["image"]) f.write(image["image"])
def complete_prefix_number_ordering(entries): def complete_prefix_number_ordering(entries):
if len(entries) == 1:
return entries
matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name))) matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name)))
for m in matches: for m in matches:
pos = m.start() pos = m.start()
@ -266,6 +271,19 @@ def collate(args):
if all(entry.is_file() and entry.suffix.lower() in IMAGE_FILE_EXTENSIONS for entry in entries): if all(entry.is_file() and entry.suffix.lower() in IMAGE_FILE_EXTENSIONS for entry in entries):
ordering = complete_prefix_number_ordering(entries) ordering = complete_prefix_number_ordering(entries)
if not ordering:
with_text = []
textless = []
for entry in entries:
if TEXTLESS_REGEX.search(entry.name):
textless.append(entry)
else:
with_text.append(entry)
if with_text and textless:
with_text_ordering = complete_prefix_number_ordering(with_text)
textless_ordering = complete_prefix_number_ordering(textless)
if with_text_ordering and textless_ordering:
ordering = with_text_ordering + textless_ordering
if ordering: if ordering:
print(f'Symlinking image files for {work_id}') print(f'Symlinking image files for {work_id}')
link_ordered_files(ordering, collation_dir) link_ordered_files(ordering, collation_dir)