more alt versions

handle combinations of versions, and cases where things have versions but not numbers
handle cases where the first item isn't numbered
2024-02-06 13:29:38 -05:00 · 2024-02-06 13:20:56 -05:00 · 2024-02-06 13:00:44 -05:00 · 2024-02-06 12:48:00 -05:00 · 2024-02-06 12:30:13 -05:00
1 changed files with 64 additions and 26 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -25,7 +25,19 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
 FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
 TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
-ALT_VERSIONS = ['褐色', '日焼け', 'pink']
+EPILOGUE_REGEX = re.compile('after|後日談')
 ALT_VERSIONS = [
    '褐色',
    '日焼け',
    'pink',
    '金髪',
    '白肌',
    'うつろ目',
    'dark skin',
    'ラバー',
    'ゾンビ肌',
    'マスク',
 ]
 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
@ -201,8 +213,11 @@ def complete_prefix_number_ordering(entries):
    entries_by_version = {}
    for entry in entries:
-        version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name)
+        version_code = 0
-        entries_by_version.setdefault(version, []).append(entry)
+        for (i, version) in enumerate(ALT_VERSIONS):
            if version in entry.name:
                version_code |= (1 << i)
        entries_by_version.setdefault(version_code, []).append(entry)
    numberings_by_version = {ver: unique_hierarchical_prefix_numbering(entries_by_version[ver]) for ver in entries_by_version}
@ -211,6 +226,7 @@ def complete_prefix_number_ordering(entries):
        if numbering is None:
            return None
        unified_indices |= set(numbering.keys())
    unified_indices.discard(None)
    unified_indices = list(unified_indices)
    unified_indices.sort()
@ -224,6 +240,8 @@ def complete_prefix_number_ordering(entries):
                        return None
                    break
    unified_indices.append(None)
    versions = list(numberings_by_version.keys())
    versions.sort()
@ -244,15 +262,22 @@ def complete_prefix_number_ordering(entries):
    return result
 def unique_hierarchical_prefix_numbering(entries, start_point=0):
-    matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name)))
+    if len(entries) == 1 and not NUMBER_REGEX.search(entries[0].name):
        return {None: entries}
    longest_entry = max(entries, key=lambda e: len(e.name))
    matches = reversed(list(NUMBER_REGEX.finditer(longest_entry.name)))
    for m in matches:
        pos = m.start()
        if pos < start_point:
            return None
-        prefix = entries[0].name[:pos]
+        prefix = longest_entry.name[:pos]
-        if all(e.name.startswith(prefix) for e in entries):
+        if all(e.name.startswith(prefix) or prefix.startswith(e.stem) for e in entries):
            numbering = {}
            for e in entries:
                if pos >= len(e.stem):
                    i = 0
                else:
                    n = NUMBER_REGEX.match(e.name[pos:])
                    if n is None:
                        return None
@ -263,7 +288,8 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
            for idx in indices:
                if len(numbering[idx]) > 1:
                    ents_idx = numbering.pop(idx)
-                    next_layer_start = pos + NUMBER_REGEX.match(ents_idx[0].name[pos:]).end()
+                    longest = max(ents_idx, key=lambda e: len(e.name))
                    next_layer_start = pos + NUMBER_REGEX.match(longest.name[pos:]).end()
                    sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start)
                    if not sub_numbering:
                        return None
@ -345,6 +371,26 @@ def collate(args):
    collation_staging_area.rmdir()
    con.close()
 def collate_regex_later(srcs, dest, regex, start_index):
    matching = []
    nonmatching = []
    for src in srcs:
        if regex.search(src.name):
            matching.append(src)
        else:
            nonmatching.append(src)
    if not (matching and nonmatching):
        return False
    nonmatching_pages = collate_from_paths(nonmatching, dest, start_index)
    if nonmatching_pages is None:
        return None
    matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages)
    if matching_pages is None:
        return None
    return nonmatching_pages + matching_pages
 def collate_from_paths(srcs, dest, start_index):
    if len(srcs) == 1 and srcs[0].is_dir():
        return collate_from_paths(ls_ignore(srcs[0]), dest, start_index)
@ -356,21 +402,13 @@ def collate_from_paths(srcs, dest, start_index):
    if len(srcs) == 0:
        return 0
-    with_text = []
+    textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index)
-    textless = []
+    if textless_split != False:
-    for src in srcs:
+        return textless_split
-        if TEXTLESS_REGEX.search(src.name):
+
-            textless.append(src)
+    epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index)
-        else:
+    if epilogue_split != False:
-            with_text.append(src)
+        return epilogue_split
    if with_text and textless:
        text_pages = collate_from_paths(with_text, dest, start_index)
        if text_pages is None:
            return None
        textless_pages = collate_from_paths(textless, dest, start_index+text_pages)
        if textless_pages is None:
            return None
        return text_pages + textless_pages
    if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs):
        ordering = complete_prefix_number_ordering(srcs)
Author	SHA1	Message	Date
xenofem	2a70e363cd	more alt versions	2024-02-06 13:29:38 -05:00
xenofem	26af5286dc	handle combinations of versions, and cases where things have versions but not numbers	2024-02-06 13:20:56 -05:00
xenofem	85ac6bc35c	handle cases where the first item isn't numbered	2024-02-06 13:00:44 -05:00
xenofem	61a7eb07f9	detect epilogues in auto collation	2024-02-06 12:48:00 -05:00
xenofem	ef0c115f0f	add more alt versions	2024-02-06 12:30:13 -05:00