diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index bb669e9..deaf3ee 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -25,19 +25,7 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$') TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') -EPILOGUE_REGEX = re.compile('after|後日談') -ALT_VERSIONS = [ - '褐色', - '日焼け', - 'pink', - '金髪', - '白肌', - 'うつろ目', - 'dark skin', - 'ラバー', - 'ゾンビ肌', - 'マスク', -] +ALT_VERSIONS = ['褐色', '日焼け', 'pink'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] @@ -213,11 +201,8 @@ def complete_prefix_number_ordering(entries): entries_by_version = {} for entry in entries: - version_code = 0 - for (i, version) in enumerate(ALT_VERSIONS): - if version in entry.name: - version_code |= (1 << i) - entries_by_version.setdefault(version_code, []).append(entry) + version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name) + entries_by_version.setdefault(version, []).append(entry) numberings_by_version = {ver: unique_hierarchical_prefix_numbering(entries_by_version[ver]) for ver in entries_by_version} @@ -226,7 +211,6 @@ def complete_prefix_number_ordering(entries): if numbering is None: return None unified_indices |= set(numbering.keys()) - unified_indices.discard(None) unified_indices = list(unified_indices) unified_indices.sort() @@ -240,8 +224,6 @@ def complete_prefix_number_ordering(entries): return None break - unified_indices.append(None) - versions = list(numberings_by_version.keys()) versions.sort() @@ -262,34 +244,26 @@ def complete_prefix_number_ordering(entries): return result def unique_hierarchical_prefix_numbering(entries, start_point=0): - if len(entries) == 1 and not NUMBER_REGEX.search(entries[0].name): - return {None: entries} - - longest_entry = max(entries, key=lambda e: len(e.name)) - matches = reversed(list(NUMBER_REGEX.finditer(longest_entry.name))) + matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name))) for m in matches: pos = m.start() if pos < start_point: return None - prefix = longest_entry.name[:pos] - if all(e.name.startswith(prefix) or prefix.startswith(e.stem) for e in entries): + prefix = entries[0].name[:pos] + if all(e.name.startswith(prefix) for e in entries): numbering = {} for e in entries: - if pos >= len(e.stem): - i = 0 - else: - n = NUMBER_REGEX.match(e.name[pos:]) - if n is None: - return None - i = int(n.group()) + n = NUMBER_REGEX.match(e.name[pos:]) + if n is None: + return None + i = int(n.group()) numbering.setdefault((i,), []).append(e) indices = list(numbering.keys()) for idx in indices: if len(numbering[idx]) > 1: ents_idx = numbering.pop(idx) - longest = max(ents_idx, key=lambda e: len(e.name)) - next_layer_start = pos + NUMBER_REGEX.match(longest.name[pos:]).end() + next_layer_start = pos + NUMBER_REGEX.match(ents_idx[0].name[pos:]).end() sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start) if not sub_numbering: return None @@ -371,26 +345,6 @@ def collate(args): collation_staging_area.rmdir() con.close() -def collate_regex_later(srcs, dest, regex, start_index): - matching = [] - nonmatching = [] - for src in srcs: - if regex.search(src.name): - matching.append(src) - else: - nonmatching.append(src) - - if not (matching and nonmatching): - return False - - nonmatching_pages = collate_from_paths(nonmatching, dest, start_index) - if nonmatching_pages is None: - return None - matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages) - if matching_pages is None: - return None - return nonmatching_pages + matching_pages - def collate_from_paths(srcs, dest, start_index): if len(srcs) == 1 and srcs[0].is_dir(): return collate_from_paths(ls_ignore(srcs[0]), dest, start_index) @@ -402,13 +356,21 @@ def collate_from_paths(srcs, dest, start_index): if len(srcs) == 0: return 0 - textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index) - if textless_split != False: - return textless_split - - epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index) - if epilogue_split != False: - return epilogue_split + with_text = [] + textless = [] + for src in srcs: + if TEXTLESS_REGEX.search(src.name): + textless.append(src) + else: + with_text.append(src) + if with_text and textless: + text_pages = collate_from_paths(with_text, dest, start_index) + if text_pages is None: + return None + textless_pages = collate_from_paths(textless, dest, start_index+text_pages) + if textless_pages is None: + return None + return text_pages + textless_pages if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs): ordering = complete_prefix_number_ordering(srcs)