smarter automatic collation when there are multiple versions of pages

This commit is contained in:
xenofem 2024-02-06 10:22:11 -05:00
parent aa039e60f5
commit cb1a1488e2

View file

@ -25,6 +25,7 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
FAKKU_ID_REGEX = re.compile('.*_FAKKU$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
ALT_VERSIONS = ['褐色', '日焼け']
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
@ -196,24 +197,59 @@ def complete_prefix_number_ordering(entries):
if len(entries) == 1: if len(entries) == 1:
return entries return entries
entries_by_version = {}
for entry in entries:
version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name)
entries_by_version.setdefault(version, []).append(entry)
numberings_by_version = {ver: prefix_numbering(entries_by_version[ver]) for ver in entries_by_version}
unified_indices = set()
for numbering in numberings_by_version.values():
if numbering is None:
return None
unified_indices |= set(numbering.keys())
unified_indices = list(unified_indices)
unified_indices.sort()
versions = list(numberings_by_version.keys())
versions.sort()
version_lengths = {ver: len(numberings_by_version[ver]) for ver in numberings_by_version}
inner_versions = []
outer_versions = [versions[0]]
for ver in versions[1:]:
if version_lengths[ver] >= version_lengths[versions[0]] - 2:
outer_versions.append(ver)
else:
inner_versions.append(ver)
result = []
for out_ver in outer_versions:
for i in unified_indices:
for ver in ([out_ver] + (inner_versions if out_ver == versions[0] else [])):
entries_i_ver = numberings_by_version[ver].get(i, [])
if len(entries_i_ver) <= 1:
result += entries_i_ver
else:
return None
return result
def prefix_numbering(entries):
matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name))) matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name)))
for m in matches: for m in matches:
pos = m.start() pos = m.start()
prefix = entries[0].name[:pos] prefix = entries[0].name[:pos]
if all(e.name.startswith(prefix) for e in entries): if all(e.name.startswith(prefix) for e in entries):
entries_with_indices = [] entries_by_index = {}
indices = set()
for e in entries: for e in entries:
n = NUMBER_REGEX.match(e.name[pos:]) n = NUMBER_REGEX.match(e.name[pos:])
if n is None: if n is None:
return None return None
i = int(n.group()) i = int(n.group())
if i in indices: entries_by_index.setdefault(i, []).append(e)
return None return entries_by_index
indices.add(i)
entries_with_indices.append((e, i))
entries_with_indices.sort(key=lambda ei: ei[1])
return [e for (e, i) in entries_with_indices]
return None return None
def link_ordered_files(ordering, dest, start_index=0): def link_ordered_files(ordering, dest, start_index=0):