fancier alphabetic numbering analysis

This commit is contained in:
xenofem 2024-04-20 13:00:20 -04:00
parent 0dcfd1d84a
commit acf99d236b

View file

@ -30,6 +30,7 @@ import rarfile
import requests import requests
NUMBER_REGEX = re.compile('[0-9-]+') NUMBER_REGEX = re.compile('[0-9-]+')
ALPHABETIC_NUMBERING_REGEX = re.compile('^(?P<prefix>[^a-z-0-9-]*)((?P<letter>[a-z-])(?P<suffix>[^a-z-0-9-]*))?$', re.I)
DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$') DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
@ -904,20 +905,39 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
def alphabetic_numbering(entries, start_point): def alphabetic_numbering(entries, start_point):
debug(f'Finding alphabetic numbering from start point {start_point} for {entries}') debug(f'Finding alphabetic numbering from start point {start_point} for {entries}')
alphabetized = {} alphabetized = {}
prefix_suffix = None
for entry in entries: for entry in entries:
ending = nfc(entry.stem)[start_point:].strip(' -_()') ending = nfc(entry.stem)[start_point:]
debug(f'{entry} has ending {ending}') debug(f'{entry} has ending {ending}')
if len(ending) > 1:
debug('Ending is more than one character, giving up') ending_match = ALPHABETIC_NUMBERING_REGEX.fullmatch(ending)
if not ending_match:
debug('Ending has more than one letter, giving up')
return None return None
index = 0 if ending == '' else ord(ending.lower()) - ord('a') + 1
if index < 0 or index > 26: current_prefix_suffix = (ending_match.group('prefix'), ending_match.group('suffix') or '')
if prefix_suffix is None:
prefix_suffix = current_prefix_suffix
elif current_prefix_suffix != prefix_suffix:
debug(f'Ending prefix/suffix does not match {prefix_suffix}, giving up')
return None
ending_letter = (ending_match.group('letter') or '').lower()
if ending_letter == '':
index = 0
elif ending_letter >= 'a' and ending_letter <= 'z':
index = ord(ending_letter) - ord('a') + 1
elif ending_letter >= '' and ending_letter <= '':
index = ord(ending_letter) - ord('') + 1
else:
debug('Ending is not a letter, giving up') debug('Ending is not a letter, giving up')
return None return None
if (index,) in alphabetized: if (index,) in alphabetized:
debug(f'Index value {index} is already present, giving up') debug(f'Index value {index} is already present, giving up')
return None return None
alphabetized[(index,)] = [entry] alphabetized[(index,)] = [entry]
return alphabetized return alphabetized
def check_extension(path, exts): def check_extension(path, exts):