detect epilogues in auto collation

2024-02-06 12:48:00 -05:00 · 2024-02-06 12:48:00 -05:00 · 61a7eb07f9
commit 61a7eb07f9
parent ef0c115f0f
1 changed files with 28 additions and 15 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -25,6 +25,7 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
 FAKKU_ID_REGEX = re.compile('.*_FAKKU$')

 TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
+EPILOGUE_REGEX = re.compile('after|後日談')
 ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌']

 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
@ -345,6 +346,26 @@ def collate(args):
    collation_staging_area.rmdir()
    con.close()

+def collate_regex_later(srcs, dest, regex, start_index):
+    matching = []
+    nonmatching = []
+    for src in srcs:
+        if regex.search(src.name):
+            matching.append(src)
+        else:
+            nonmatching.append(src)
+
+    if not (matching and nonmatching):
+        return False
+
+    nonmatching_pages = collate_from_paths(nonmatching, dest, start_index)
+    if nonmatching_pages is None:
+        return None
+    matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages)
+    if matching_pages is None:
+        return None
+    return nonmatching_pages + matching_pages
+
 def collate_from_paths(srcs, dest, start_index):
    if len(srcs) == 1 and srcs[0].is_dir():
        return collate_from_paths(ls_ignore(srcs[0]), dest, start_index)
@ -356,21 +377,13 @@ def collate_from_paths(srcs, dest, start_index):
    if len(srcs) == 0:
        return 0

-    with_text = []
-    textless = []
-    for src in srcs:
-        if TEXTLESS_REGEX.search(src.name):
-            textless.append(src)
-        else:
-            with_text.append(src)
-    if with_text and textless:
-        text_pages = collate_from_paths(with_text, dest, start_index)
-        if text_pages is None:
-            return None
-        textless_pages = collate_from_paths(textless, dest, start_index+text_pages)
-        if textless_pages is None:
-            return None
-        return text_pages + textless_pages
+    textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index)
+    if textless_split != False:
+        return textless_split
+
+    epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index)
+    if epilogue_split != False:
+        return epilogue_split

    if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs):
        ordering = complete_prefix_number_ordering(srcs)