add best-effort sort option

This commit is contained in:
xenofem 2024-05-28 00:33:00 -04:00
parent 0b159ebba7
commit c26300d752

View file

@ -32,6 +32,8 @@ import requests
NUMBER_REGEX = re.compile('[0-9-]+')
ALPHABETIC_NUMBERING_REGEX = re.compile('^(?P<prefix>[^a-z-0-9-]*)((?P<letter>[a-z-])(?P<suffix>[^a-z-0-9-]*))?$', re.I)
STRING_TOKENIZE_REGEX = re.compile('(?P<str>[^0-9-]+)|(?P<num>[0-9-]+)')
EXTRA_NORMALIZATION_TABLE = str.maketrans({'\u301c': '\uff5e'})
DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
@ -530,6 +532,10 @@ class Collator:
if all(src.is_file() and is_image(src) for src in srcs):
ordering = complete_prefix_number_ordering(srcs)
if not ordering and self.args.sort:
ordering = srcs.copy()
ordering.sort(key=best_effort_sort_key)
debug(f'Applying best-effort sort: {ordering}')
if ordering:
print(f'Symlinking image files: {ordering[0]}...')
return self.link_ordered_files(ordering)
@ -812,6 +818,19 @@ def nname(entry):
def nstem(entry):
return normalize_string(entry.stem)
def best_effort_sort_key(entry):
result = []
for token in STRING_TOKENIZE_REGEX.finditer(nstem(entry)):
if token.lastgroup == 'num':
if len(result) == 0:
result.append('') # to prevent failed int/string comparisons against other files
result.append(int(token.group()))
else:
result.append(token.group())
debug(f"Tokenized {entry} as {result}")
return result
def complete_prefix_number_ordering(entries):
if len(entries) == 1:
return entries
@ -1460,6 +1479,11 @@ parser_collate.add_argument(
action='store_true',
help="only collate works that are explicitly specified",
)
parser_collate.add_argument(
'-s', '--sort',
action='store_true',
help="apply a best-effort sorting algorithm when the ordering of image files is unclear",
)
parser_collate.add_argument(
'-p', '--pdf-strategy',
choices=[