fancier alphabetic numbering analysis

2024-04-20 13:00:20 -04:00 · 2024-04-20 13:00:20 -04:00 · acf99d236b
commit acf99d236b
parent 0dcfd1d84a
1 changed files with 25 additions and 5 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -30,6 +30,7 @@ import rarfile
 import requests

 NUMBER_REGEX = re.compile('[0-9０-９]+')
+ALPHABETIC_NUMBERING_REGEX = re.compile('^(?P<prefix>[^a-zａ-ｚ0-9０-９]*)((?P<letter>[a-zａ-ｚ])(?P<suffix>[^a-zａ-ｚ0-9０-９]*))?$', re.I)

 DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
 FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
@ -904,20 +905,39 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
 def alphabetic_numbering(entries, start_point):
    debug(f'Finding alphabetic numbering from start point {start_point} for {entries}')
    alphabetized = {}
+    prefix_suffix = None
    for entry in entries:
-        ending = nfc(entry.stem)[start_point:].strip(' -_()')
+        ending = nfc(entry.stem)[start_point:]
        debug(f'{entry} has ending {ending}')
-        if len(ending) > 1:
-            debug('Ending is more than one character, giving up')
+
+        ending_match = ALPHABETIC_NUMBERING_REGEX.fullmatch(ending)
+        if not ending_match:
+            debug('Ending has more than one letter, giving up')
            return None
-        index = 0 if ending == '' else ord(ending.lower()) - ord('a') + 1
-        if index < 0 or index > 26:
+
+        current_prefix_suffix = (ending_match.group('prefix'), ending_match.group('suffix') or '')
+        if prefix_suffix is None:
+            prefix_suffix = current_prefix_suffix
+        elif current_prefix_suffix != prefix_suffix:
+            debug(f'Ending prefix/suffix does not match {prefix_suffix}, giving up')
+            return None
+
+        ending_letter = (ending_match.group('letter') or '').lower()
+        if ending_letter == '':
+            index = 0
+        elif ending_letter >= 'a' and ending_letter <= 'z':
+            index = ord(ending_letter) - ord('a') + 1
+        elif ending_letter >= 'ａ' and ending_letter <= 'ｚ':
+            index = ord(ending_letter) - ord('ａ') + 1
+        else:
            debug('Ending is not a letter, giving up')
            return None
+
        if (index,) in alphabetized:
            debug(f'Index value {index} is already present, giving up')
            return None
        alphabetized[(index,)] = [entry]
+
    return alphabetized

 def check_extension(path, exts):