From acf99d236b7b8981436e94df909b67c1fb9ea28c Mon Sep 17 00:00:00 2001 From: xenofem Date: Sat, 20 Apr 2024 13:00:20 -0400 Subject: [PATCH] fancier alphabetic numbering analysis --- dlibrary/dlibrary.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index dd5c4c6..9e52d35 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -30,6 +30,7 @@ import rarfile import requests NUMBER_REGEX = re.compile('[0-90-9]+') +ALPHABETIC_NUMBERING_REGEX = re.compile('^(?P[^a-za-z0-90-9]*)((?P[a-za-z])(?P[^a-za-z0-90-9]*))?$', re.I) DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$') FANZA_ID_REGEX = re.compile('^d_[0-9]+$') @@ -904,20 +905,39 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0): def alphabetic_numbering(entries, start_point): debug(f'Finding alphabetic numbering from start point {start_point} for {entries}') alphabetized = {} + prefix_suffix = None for entry in entries: - ending = nfc(entry.stem)[start_point:].strip(' -_()') + ending = nfc(entry.stem)[start_point:] debug(f'{entry} has ending {ending}') - if len(ending) > 1: - debug('Ending is more than one character, giving up') + + ending_match = ALPHABETIC_NUMBERING_REGEX.fullmatch(ending) + if not ending_match: + debug('Ending has more than one letter, giving up') return None - index = 0 if ending == '' else ord(ending.lower()) - ord('a') + 1 - if index < 0 or index > 26: + + current_prefix_suffix = (ending_match.group('prefix'), ending_match.group('suffix') or '') + if prefix_suffix is None: + prefix_suffix = current_prefix_suffix + elif current_prefix_suffix != prefix_suffix: + debug(f'Ending prefix/suffix does not match {prefix_suffix}, giving up') + return None + + ending_letter = (ending_match.group('letter') or '').lower() + if ending_letter == '': + index = 0 + elif ending_letter >= 'a' and ending_letter <= 'z': + index = ord(ending_letter) - ord('a') + 1 + elif ending_letter >= 'a' and ending_letter <= 'z': + index = ord(ending_letter) - ord('a') + 1 + else: debug('Ending is not a letter, giving up') return None + if (index,) in alphabetized: debug(f'Index value {index} is already present, giving up') return None alphabetized[(index,)] = [entry] + return alphabetized def check_extension(path, exts):