From ef0c115f0ff997c7e296ad3ec749688e394911e5 Mon Sep 17 00:00:00 2001
From: xenofem <xenofem@xeno.science>
Date: Tue, 6 Feb 2024 12:29:32 -0500
Subject: [PATCH 1/5] add more alt versions

---
 dlibrary/dlibrary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index deaf3ee..f49bee6 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -25,7 +25,7 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
 FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
 
 TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
-ALT_VERSIONS = ['褐色', '日焼け', 'pink']
+ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌']
 
 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
 

From 61a7eb07f90ab4a0674fbd9e562d8dcbdc0c2de0 Mon Sep 17 00:00:00 2001
From: xenofem <xenofem@xeno.science>
Date: Tue, 6 Feb 2024 12:48:00 -0500
Subject: [PATCH 2/5] detect epilogues in auto collation

---
 dlibrary/dlibrary.py | 43 ++++++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index f49bee6..ef8bbb8 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -25,6 +25,7 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
 FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
 
 TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
+EPILOGUE_REGEX = re.compile('after|後日談')
 ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌']
 
 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
@@ -345,6 +346,26 @@ def collate(args):
     collation_staging_area.rmdir()
     con.close()
 
+def collate_regex_later(srcs, dest, regex, start_index):
+    matching = []
+    nonmatching = []
+    for src in srcs:
+        if regex.search(src.name):
+            matching.append(src)
+        else:
+            nonmatching.append(src)
+
+    if not (matching and nonmatching):
+        return False
+
+    nonmatching_pages = collate_from_paths(nonmatching, dest, start_index)
+    if nonmatching_pages is None:
+        return None
+    matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages)
+    if matching_pages is None:
+        return None
+    return nonmatching_pages + matching_pages
+
 def collate_from_paths(srcs, dest, start_index):
     if len(srcs) == 1 and srcs[0].is_dir():
         return collate_from_paths(ls_ignore(srcs[0]), dest, start_index)
@@ -356,21 +377,13 @@ def collate_from_paths(srcs, dest, start_index):
     if len(srcs) == 0:
         return 0
 
-    with_text = []
-    textless = []
-    for src in srcs:
-        if TEXTLESS_REGEX.search(src.name):
-            textless.append(src)
-        else:
-            with_text.append(src)
-    if with_text and textless:
-        text_pages = collate_from_paths(with_text, dest, start_index)
-        if text_pages is None:
-            return None
-        textless_pages = collate_from_paths(textless, dest, start_index+text_pages)
-        if textless_pages is None:
-            return None
-        return text_pages + textless_pages
+    textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index)
+    if textless_split != False:
+        return textless_split
+
+    epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index)
+    if epilogue_split != False:
+        return epilogue_split
 
     if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs):
         ordering = complete_prefix_number_ordering(srcs)

From 85ac6bc35c145015c09f8c8846d567b8213a9f38 Mon Sep 17 00:00:00 2001
From: xenofem <xenofem@xeno.science>
Date: Tue, 6 Feb 2024 13:00:44 -0500
Subject: [PATCH 3/5] handle cases where the first item isn't numbered

---
 dlibrary/dlibrary.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index ef8bbb8..4e2eea6 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -245,26 +245,31 @@ def complete_prefix_number_ordering(entries):
     return result
 
 def unique_hierarchical_prefix_numbering(entries, start_point=0):
-    matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name)))
+    longest_entry = max(entries, key=lambda e: len(e.name))
+    matches = reversed(list(NUMBER_REGEX.finditer(longest_entry.name)))
     for m in matches:
         pos = m.start()
         if pos < start_point:
             return None
-        prefix = entries[0].name[:pos]
-        if all(e.name.startswith(prefix) for e in entries):
+        prefix = longest_entry.name[:pos]
+        if all(e.name.startswith(prefix) or prefix.startswith(e.stem) for e in entries):
             numbering = {}
             for e in entries:
-                n = NUMBER_REGEX.match(e.name[pos:])
-                if n is None:
-                    return None
-                i = int(n.group())
+                if pos >= len(e.stem):
+                    i = 0
+                else:
+                    n = NUMBER_REGEX.match(e.name[pos:])
+                    if n is None:
+                        return None
+                    i = int(n.group())
                 numbering.setdefault((i,), []).append(e)
 
             indices = list(numbering.keys())
             for idx in indices:
                 if len(numbering[idx]) > 1:
                     ents_idx = numbering.pop(idx)
-                    next_layer_start = pos + NUMBER_REGEX.match(ents_idx[0].name[pos:]).end()
+                    longest = max(ents_idx, key=lambda e: len(e.name))
+                    next_layer_start = pos + NUMBER_REGEX.match(longest.name[pos:]).end()
                     sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start)
                     if not sub_numbering:
                         return None

From 26af5286dcf06f1a0eb2d771097fce614de37758 Mon Sep 17 00:00:00 2001
From: xenofem <xenofem@xeno.science>
Date: Tue, 6 Feb 2024 13:20:56 -0500
Subject: [PATCH 4/5] handle combinations of versions, and cases where things
 have versions but not numbers

---
 dlibrary/dlibrary.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index 4e2eea6..8dee4bd 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -26,7 +26,7 @@ FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
 
 TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
 EPILOGUE_REGEX = re.compile('after|後日談')
-ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌']
+ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌', 'うつろ目']
 
 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
 
@@ -202,8 +202,11 @@ def complete_prefix_number_ordering(entries):
 
     entries_by_version = {}
     for entry in entries:
-        version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name)
-        entries_by_version.setdefault(version, []).append(entry)
+        version_code = 0
+        for (i, version) in enumerate(ALT_VERSIONS):
+            if version in entry.name:
+                version_code |= (1 << i)
+        entries_by_version.setdefault(version_code, []).append(entry)
 
     numberings_by_version = {ver: unique_hierarchical_prefix_numbering(entries_by_version[ver]) for ver in entries_by_version}
 
@@ -212,6 +215,7 @@ def complete_prefix_number_ordering(entries):
         if numbering is None:
             return None
         unified_indices |= set(numbering.keys())
+    unified_indices.discard(None)
     unified_indices = list(unified_indices)
     unified_indices.sort()
 
@@ -225,6 +229,8 @@ def complete_prefix_number_ordering(entries):
                         return None
                     break
 
+    unified_indices.append(None)
+
     versions = list(numberings_by_version.keys())
     versions.sort()
 
@@ -245,6 +251,9 @@ def complete_prefix_number_ordering(entries):
     return result
 
 def unique_hierarchical_prefix_numbering(entries, start_point=0):
+    if len(entries) == 1 and not NUMBER_REGEX.search(entries[0].name):
+        return {None: entries}
+
     longest_entry = max(entries, key=lambda e: len(e.name))
     matches = reversed(list(NUMBER_REGEX.finditer(longest_entry.name)))
     for m in matches:

From 2a70e363cd425be610b6cbe67c4e0bce1244b6fc Mon Sep 17 00:00:00 2001
From: xenofem <xenofem@xeno.science>
Date: Tue, 6 Feb 2024 13:29:38 -0500
Subject: [PATCH 5/5] more alt versions

---
 dlibrary/dlibrary.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index 8dee4bd..bb669e9 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -26,7 +26,18 @@ FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
 
 TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
 EPILOGUE_REGEX = re.compile('after|後日談')
-ALT_VERSIONS = ['褐色', '日焼け', 'pink', '金髪', '白肌', 'うつろ目']
+ALT_VERSIONS = [
+    '褐色',
+    '日焼け',
+    'pink',
+    '金髪',
+    '白肌',
+    'うつろ目',
+    'dark skin',
+    'ラバー',
+    'ゾンビ肌',
+    'マスク',
+]
 
 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']