From ef0c115f0ff997c7e296ad3ec749688e394911e5 Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 6 Feb 2024 12:29:32 -0500 Subject: [PATCH 1/5] add more alt versions --- dlibrary/dlibrary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index deaf3ee..f49bee6 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -25,7 +25,7 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$') TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') -ALT_VERSIONS = ['褐色', '日焼け', 'pink'] +ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] From 61a7eb07f90ab4a0674fbd9e562d8dcbdc0c2de0 Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 6 Feb 2024 12:48:00 -0500 Subject: [PATCH 2/5] detect epilogues in auto collation --- dlibrary/dlibrary.py | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index f49bee6..ef8bbb8 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -25,6 +25,7 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$') TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') +EPILOGUE_REGEX = re.compile('after|後日談') ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] @@ -345,6 +346,26 @@ def collate(args): collation_staging_area.rmdir() con.close() +def collate_regex_later(srcs, dest, regex, start_index): + matching = [] + nonmatching = [] + for src in srcs: + if regex.search(src.name): + matching.append(src) + else: + nonmatching.append(src) + + if not (matching and nonmatching): + return False + + nonmatching_pages = collate_from_paths(nonmatching, dest, start_index) + if nonmatching_pages is None: + return None + matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages) + if matching_pages is None: + return None + return nonmatching_pages + matching_pages + def collate_from_paths(srcs, dest, start_index): if len(srcs) == 1 and srcs[0].is_dir(): return collate_from_paths(ls_ignore(srcs[0]), dest, start_index) @@ -356,21 +377,13 @@ def collate_from_paths(srcs, dest, start_index): if len(srcs) == 0: return 0 - with_text = [] - textless = [] - for src in srcs: - if TEXTLESS_REGEX.search(src.name): - textless.append(src) - else: - with_text.append(src) - if with_text and textless: - text_pages = collate_from_paths(with_text, dest, start_index) - if text_pages is None: - return None - textless_pages = collate_from_paths(textless, dest, start_index+text_pages) - if textless_pages is None: - return None - return text_pages + textless_pages + textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index) + if textless_split != False: + return textless_split + + epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index) + if epilogue_split != False: + return epilogue_split if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs): ordering = complete_prefix_number_ordering(srcs) From 85ac6bc35c145015c09f8c8846d567b8213a9f38 Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 6 Feb 2024 13:00:44 -0500 Subject: [PATCH 3/5] handle cases where the first item isn't numbered --- dlibrary/dlibrary.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index ef8bbb8..4e2eea6 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -245,26 +245,31 @@ def complete_prefix_number_ordering(entries): return result def unique_hierarchical_prefix_numbering(entries, start_point=0): - matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name))) + longest_entry = max(entries, key=lambda e: len(e.name)) + matches = reversed(list(NUMBER_REGEX.finditer(longest_entry.name))) for m in matches: pos = m.start() if pos < start_point: return None - prefix = entries[0].name[:pos] - if all(e.name.startswith(prefix) for e in entries): + prefix = longest_entry.name[:pos] + if all(e.name.startswith(prefix) or prefix.startswith(e.stem) for e in entries): numbering = {} for e in entries: - n = NUMBER_REGEX.match(e.name[pos:]) - if n is None: - return None - i = int(n.group()) + if pos >= len(e.stem): + i = 0 + else: + n = NUMBER_REGEX.match(e.name[pos:]) + if n is None: + return None + i = int(n.group()) numbering.setdefault((i,), []).append(e) indices = list(numbering.keys()) for idx in indices: if len(numbering[idx]) > 1: ents_idx = numbering.pop(idx) - next_layer_start = pos + NUMBER_REGEX.match(ents_idx[0].name[pos:]).end() + longest = max(ents_idx, key=lambda e: len(e.name)) + next_layer_start = pos + NUMBER_REGEX.match(longest.name[pos:]).end() sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start) if not sub_numbering: return None From 26af5286dcf06f1a0eb2d771097fce614de37758 Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 6 Feb 2024 13:20:56 -0500 Subject: [PATCH 4/5] handle combinations of versions, and cases where things have versions but not numbers --- dlibrary/dlibrary.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 4e2eea6..8dee4bd 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -26,7 +26,7 @@ FAKKU_ID_REGEX = re.compile('.*_FAKKU$') TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') EPILOGUE_REGEX = re.compile('after|後日談') -ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌'] +ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌', 'うつろ目'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] @@ -202,8 +202,11 @@ def complete_prefix_number_ordering(entries): entries_by_version = {} for entry in entries: - version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name) - entries_by_version.setdefault(version, []).append(entry) + version_code = 0 + for (i, version) in enumerate(ALT_VERSIONS): + if version in entry.name: + version_code |= (1 << i) + entries_by_version.setdefault(version_code, []).append(entry) numberings_by_version = {ver: unique_hierarchical_prefix_numbering(entries_by_version[ver]) for ver in entries_by_version} @@ -212,6 +215,7 @@ def complete_prefix_number_ordering(entries): if numbering is None: return None unified_indices |= set(numbering.keys()) + unified_indices.discard(None) unified_indices = list(unified_indices) unified_indices.sort() @@ -225,6 +229,8 @@ def complete_prefix_number_ordering(entries): return None break + unified_indices.append(None) + versions = list(numberings_by_version.keys()) versions.sort() @@ -245,6 +251,9 @@ def complete_prefix_number_ordering(entries): return result def unique_hierarchical_prefix_numbering(entries, start_point=0): + if len(entries) == 1 and not NUMBER_REGEX.search(entries[0].name): + return {None: entries} + longest_entry = max(entries, key=lambda e: len(e.name)) matches = reversed(list(NUMBER_REGEX.finditer(longest_entry.name))) for m in matches: From 2a70e363cd425be610b6cbe67c4e0bce1244b6fc Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 6 Feb 2024 13:29:38 -0500 Subject: [PATCH 5/5] more alt versions --- dlibrary/dlibrary.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 8dee4bd..bb669e9 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -26,7 +26,18 @@ FAKKU_ID_REGEX = re.compile('.*_FAKKU$') TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') EPILOGUE_REGEX = re.compile('after|後日談') -ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌', 'うつろ目'] +ALT_VERSIONS = [ + '褐色', + '日焼け', + 'pink', + '金髪', + '白肌', + 'うつろ目', + 'dark skin', + 'ラバー', + 'ゾンビ肌', + 'マスク', +] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']