From cb1a1488e205e94fb0d6479891e18b50aa40e72a Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 6 Feb 2024 10:22:11 -0500 Subject: [PATCH] smarter automatic collation when there are multiple versions of pages --- dlibrary/dlibrary.py | 52 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index d345974..c4dbfba 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -25,6 +25,7 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$') TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') +ALT_VERSIONS = ['褐色', '日焼け'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] @@ -196,24 +197,59 @@ def complete_prefix_number_ordering(entries): if len(entries) == 1: return entries + entries_by_version = {} + for entry in entries: + version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name) + entries_by_version.setdefault(version, []).append(entry) + + numberings_by_version = {ver: prefix_numbering(entries_by_version[ver]) for ver in entries_by_version} + + unified_indices = set() + for numbering in numberings_by_version.values(): + if numbering is None: + return None + unified_indices |= set(numbering.keys()) + unified_indices = list(unified_indices) + unified_indices.sort() + + versions = list(numberings_by_version.keys()) + versions.sort() + + version_lengths = {ver: len(numberings_by_version[ver]) for ver in numberings_by_version} + inner_versions = [] + outer_versions = [versions[0]] + for ver in versions[1:]: + if version_lengths[ver] >= version_lengths[versions[0]] - 2: + outer_versions.append(ver) + else: + inner_versions.append(ver) + + result = [] + for out_ver in outer_versions: + for i in unified_indices: + for ver in ([out_ver] + (inner_versions if out_ver == versions[0] else [])): + entries_i_ver = numberings_by_version[ver].get(i, []) + if len(entries_i_ver) <= 1: + result += entries_i_ver + else: + return None + return result + +def prefix_numbering(entries): matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name))) for m in matches: pos = m.start() prefix = entries[0].name[:pos] if all(e.name.startswith(prefix) for e in entries): - entries_with_indices = [] - indices = set() + entries_by_index = {} for e in entries: n = NUMBER_REGEX.match(e.name[pos:]) if n is None: return None i = int(n.group()) - if i in indices: - return None - indices.add(i) - entries_with_indices.append((e, i)) - entries_with_indices.sort(key=lambda ei: ei[1]) - return [e for (e, i) in entries_with_indices] + entries_by_index.setdefault(i, []).append(e) + return entries_by_index + return None def link_ordered_files(ordering, dest, start_index=0):