diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index ae7b0c1..386230b 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -32,6 +32,8 @@ import requests NUMBER_REGEX = re.compile('[0-90-9]+') ALPHABETIC_NUMBERING_REGEX = re.compile('^(?P[^a-za-z0-90-9]*)((?P[a-za-z])(?P[^a-za-z0-90-9]*))?$', re.I) +STRING_TOKENIZE_REGEX = re.compile('(?P[^0-90-9]+)|(?P[0-90-9]+)') + EXTRA_NORMALIZATION_TABLE = str.maketrans({'\u301c': '\uff5e'}) DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$') @@ -530,6 +532,10 @@ class Collator: if all(src.is_file() and is_image(src) for src in srcs): ordering = complete_prefix_number_ordering(srcs) + if not ordering and self.args.sort: + ordering = srcs.copy() + ordering.sort(key=best_effort_sort_key) + debug(f'Applying best-effort sort: {ordering}') if ordering: print(f'Symlinking image files: {ordering[0]}...') return self.link_ordered_files(ordering) @@ -812,6 +818,19 @@ def nname(entry): def nstem(entry): return normalize_string(entry.stem) +def best_effort_sort_key(entry): + result = [] + for token in STRING_TOKENIZE_REGEX.finditer(nstem(entry)): + if token.lastgroup == 'num': + if len(result) == 0: + result.append('') # to prevent failed int/string comparisons against other files + result.append(int(token.group())) + else: + result.append(token.group()) + + debug(f"Tokenized {entry} as {result}") + return result + def complete_prefix_number_ordering(entries): if len(entries) == 1: return entries @@ -1460,6 +1479,11 @@ parser_collate.add_argument( action='store_true', help="only collate works that are explicitly specified", ) +parser_collate.add_argument( + '-s', '--sort', + action='store_true', + help="apply a best-effort sorting algorithm when the ordering of image files is unclear", +) parser_collate.add_argument( '-p', '--pdf-strategy', choices=[