From 51243aca6e087ed25b30d2e832f488cf7d80b891 Mon Sep 17 00:00:00 2001
From: xenofem <xenofem@xeno.science>
Date: Wed, 7 Feb 2024 17:12:02 -0500
Subject: [PATCH 1/2] add more textless regex, fix bug in alphabetized
 uniqueness checker

---
 dlibrary/dlibrary.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index 02371dc..189fde7 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -26,7 +26,7 @@ DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
 FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
 FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
 
-TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
+TEXTLESS_REGEX = re.compile('(台詞|セリフ|テキスト|文字)(な|無)し|notext|textless')
 EPILOGUE_REGEX = re.compile('after|後日談')
 ALT_VERSIONS = [
     '褐色',
@@ -310,7 +310,7 @@ def alphabetic_numbering(entries, start_point):
         if len(ending) > 1:
             return None
         index = 0 if ending == '' else ord(ending.lower()) - ord('a')
-        if index in alphabetized:
+        if (index,) in alphabetized:
             return None
         alphabetized[(index,)] = [entry]
     indices = list(alphabetized.keys())

From 9ff18f933b6eaf691486ba8011fd23e730fa6784 Mon Sep 17 00:00:00 2001
From: xenofem <xenofem@xeno.science>
Date: Wed, 7 Feb 2024 17:42:18 -0500
Subject: [PATCH 2/2] detect cover images and hi-res editions

---
 dlibrary/dlibrary.py | 53 ++++++++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index 189fde7..374386f 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -28,6 +28,8 @@ FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
 
 TEXTLESS_REGEX = re.compile('(台詞|セリフ|テキスト|文字)(な|無)し|notext|textless')
 EPILOGUE_REGEX = re.compile('after|後日談')
+HI_RES_REGEX = re.compile('高解像度')
+COVER_REGEX = re.compile('表紙')
 ALT_VERSIONS = [
     '褐色',
     '日焼け',
@@ -387,25 +389,36 @@ def collate(args):
     collation_staging_area.rmdir()
     con.close()
 
-def collate_regex_later(srcs, dest, regex, start_index):
-    matching = []
-    nonmatching = []
+def collate_split_regex(srcs, dest, start_index, earlier=None, later=None):
+    early_srcs = []
+    middle_srcs = []
+    late_srcs = []
     for src in srcs:
-        if regex.search(src.name):
-            matching.append(src)
+        if earlier and earlier.search(src.name):
+            early_srcs.append(src)
+        elif later and later.search(src.name):
+            late_srcs.append(src)
         else:
-            nonmatching.append(src)
+            middle_srcs.append(src)
 
-    if not (matching and nonmatching):
+    if sum(1 for l in [early_srcs, middle_srcs, late_srcs] if l) <= 1:
         return False
 
-    nonmatching_pages = collate_from_paths(nonmatching, dest, start_index)
-    if nonmatching_pages is None:
+    early_page_count = collate_from_paths(early_srcs, dest, start_index)
+    if early_page_count is None:
         return None
-    matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages)
-    if matching_pages is None:
+    start_index += early_page_count
+
+    middle_page_count = collate_from_paths(middle_srcs, dest, start_index)
+    if middle_page_count is None:
         return None
-    return nonmatching_pages + matching_pages
+    start_index += middle_page_count
+
+    late_page_count = collate_from_paths(late_srcs, dest, start_index)
+    if late_page_count is None:
+        return None
+
+    return early_page_count + middle_page_count + late_page_count
 
 def standalone_image_size(filepath):
     with Image.open(filepath) as im:
@@ -441,14 +454,26 @@ def collate_from_paths(srcs, dest, start_index):
     if len(srcs) == 0:
         return 0
 
-    textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index)
+    if len(srcs) == 2 and all(src.is_dir() for src in srcs):
+        hi_res_dirs = [src for src in srcs if HI_RES_REGEX.search(src.name)]
+        if len(hi_res_dirs) == 1:
+            hi_res_dir = hi_res_dirs[0]
+            lo_res_dir = next(src for src in srcs if src != hi_res_dir)
+            if len(descendant_files_ignore(lo_res_dir)) == len(descendant_files_ignore(hi_res_dir)):
+                return collate_from_paths([hi_res_dir], dest, start_index)
+
+    textless_split = collate_split_regex(srcs, dest, start_index, later=TEXTLESS_REGEX)
     if textless_split != False:
         return textless_split
 
-    epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index)
+    epilogue_split = collate_split_regex(srcs, dest, start_index, later=EPILOGUE_REGEX)
     if epilogue_split != False:
         return epilogue_split
 
+    cover_split = collate_split_regex(srcs, dest, start_index, earlier=COVER_REGEX)
+    if cover_split != False:
+        return cover_split
+
     if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs):
         ordering = complete_prefix_number_ordering(srcs)
         if ordering: