Compare commits

...

5 Commits

1 changed files with 64 additions and 26 deletions

View File

@ -25,7 +25,19 @@ FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
ALT_VERSIONS = ['褐色', '日焼け', 'pink']
EPILOGUE_REGEX = re.compile('after|後日談')
ALT_VERSIONS = [
'褐色',
'日焼け',
'pink',
'金髪',
'白肌',
'うつろ目',
'dark skin',
'ラバー',
'ゾンビ肌',
'マスク',
]
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
@ -201,8 +213,11 @@ def complete_prefix_number_ordering(entries):
entries_by_version = {}
for entry in entries:
version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name)
entries_by_version.setdefault(version, []).append(entry)
version_code = 0
for (i, version) in enumerate(ALT_VERSIONS):
if version in entry.name:
version_code |= (1 << i)
entries_by_version.setdefault(version_code, []).append(entry)
numberings_by_version = {ver: unique_hierarchical_prefix_numbering(entries_by_version[ver]) for ver in entries_by_version}
@ -211,6 +226,7 @@ def complete_prefix_number_ordering(entries):
if numbering is None:
return None
unified_indices |= set(numbering.keys())
unified_indices.discard(None)
unified_indices = list(unified_indices)
unified_indices.sort()
@ -224,6 +240,8 @@ def complete_prefix_number_ordering(entries):
return None
break
unified_indices.append(None)
versions = list(numberings_by_version.keys())
versions.sort()
@ -244,26 +262,34 @@ def complete_prefix_number_ordering(entries):
return result
def unique_hierarchical_prefix_numbering(entries, start_point=0):
matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name)))
if len(entries) == 1 and not NUMBER_REGEX.search(entries[0].name):
return {None: entries}
longest_entry = max(entries, key=lambda e: len(e.name))
matches = reversed(list(NUMBER_REGEX.finditer(longest_entry.name)))
for m in matches:
pos = m.start()
if pos < start_point:
return None
prefix = entries[0].name[:pos]
if all(e.name.startswith(prefix) for e in entries):
prefix = longest_entry.name[:pos]
if all(e.name.startswith(prefix) or prefix.startswith(e.stem) for e in entries):
numbering = {}
for e in entries:
n = NUMBER_REGEX.match(e.name[pos:])
if n is None:
return None
i = int(n.group())
if pos >= len(e.stem):
i = 0
else:
n = NUMBER_REGEX.match(e.name[pos:])
if n is None:
return None
i = int(n.group())
numbering.setdefault((i,), []).append(e)
indices = list(numbering.keys())
for idx in indices:
if len(numbering[idx]) > 1:
ents_idx = numbering.pop(idx)
next_layer_start = pos + NUMBER_REGEX.match(ents_idx[0].name[pos:]).end()
longest = max(ents_idx, key=lambda e: len(e.name))
next_layer_start = pos + NUMBER_REGEX.match(longest.name[pos:]).end()
sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start)
if not sub_numbering:
return None
@ -345,6 +371,26 @@ def collate(args):
collation_staging_area.rmdir()
con.close()
def collate_regex_later(srcs, dest, regex, start_index):
matching = []
nonmatching = []
for src in srcs:
if regex.search(src.name):
matching.append(src)
else:
nonmatching.append(src)
if not (matching and nonmatching):
return False
nonmatching_pages = collate_from_paths(nonmatching, dest, start_index)
if nonmatching_pages is None:
return None
matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages)
if matching_pages is None:
return None
return nonmatching_pages + matching_pages
def collate_from_paths(srcs, dest, start_index):
if len(srcs) == 1 and srcs[0].is_dir():
return collate_from_paths(ls_ignore(srcs[0]), dest, start_index)
@ -356,21 +402,13 @@ def collate_from_paths(srcs, dest, start_index):
if len(srcs) == 0:
return 0
with_text = []
textless = []
for src in srcs:
if TEXTLESS_REGEX.search(src.name):
textless.append(src)
else:
with_text.append(src)
if with_text and textless:
text_pages = collate_from_paths(with_text, dest, start_index)
if text_pages is None:
return None
textless_pages = collate_from_paths(textless, dest, start_index+text_pages)
if textless_pages is None:
return None
return text_pages + textless_pages
textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index)
if textless_split != False:
return textless_split
epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index)
if epilogue_split != False:
return epilogue_split
if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs):
ordering = complete_prefix_number_ordering(srcs)