From 61a7eb07f90ab4a0674fbd9e562d8dcbdc0c2de0 Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 6 Feb 2024 12:48:00 -0500 Subject: [PATCH] detect epilogues in auto collation --- dlibrary/dlibrary.py | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index f49bee6..ef8bbb8 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -25,6 +25,7 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$') TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') +EPILOGUE_REGEX = re.compile('after|後日談') ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] @@ -345,6 +346,26 @@ def collate(args): collation_staging_area.rmdir() con.close() +def collate_regex_later(srcs, dest, regex, start_index): + matching = [] + nonmatching = [] + for src in srcs: + if regex.search(src.name): + matching.append(src) + else: + nonmatching.append(src) + + if not (matching and nonmatching): + return False + + nonmatching_pages = collate_from_paths(nonmatching, dest, start_index) + if nonmatching_pages is None: + return None + matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages) + if matching_pages is None: + return None + return nonmatching_pages + matching_pages + def collate_from_paths(srcs, dest, start_index): if len(srcs) == 1 and srcs[0].is_dir(): return collate_from_paths(ls_ignore(srcs[0]), dest, start_index) @@ -356,21 +377,13 @@ def collate_from_paths(srcs, dest, start_index): if len(srcs) == 0: return 0 - with_text = [] - textless = [] - for src in srcs: - if TEXTLESS_REGEX.search(src.name): - textless.append(src) - else: - with_text.append(src) - if with_text and textless: - text_pages = collate_from_paths(with_text, dest, start_index) - if text_pages is None: - return None - textless_pages = collate_from_paths(textless, dest, start_index+text_pages) - if textless_pages is None: - return None - return text_pages + textless_pages + textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index) + if textless_split != False: + return textless_split + + epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index) + if epilogue_split != False: + return epilogue_split if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs): ordering = complete_prefix_number_ordering(srcs)