Compare commits
3 commits
0dcfd1d84a
...
b26a854bdb
Author | SHA1 | Date | |
---|---|---|---|
xenofem | b26a854bdb | ||
xenofem | d33d4816c6 | ||
xenofem | acf99d236b |
|
@ -30,6 +30,7 @@ import rarfile
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
NUMBER_REGEX = re.compile('[0-90-9]+')
|
NUMBER_REGEX = re.compile('[0-90-9]+')
|
||||||
|
ALPHABETIC_NUMBERING_REGEX = re.compile('^(?P<prefix>[^a-za-z0-90-9]*)((?P<letter>[a-za-z])(?P<suffix>[^a-za-z0-90-9]*))?$', re.I)
|
||||||
|
|
||||||
DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
|
DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
|
||||||
FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
|
FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
|
||||||
|
@ -64,11 +65,13 @@ FRONT_COVER_REGEX = re.compile('(?<!裏)表紙(?!裏)|(?<!back[-_ ])(?<!back)cov
|
||||||
BACK_COVER_REGEX = re.compile('裏表紙|hyou?sh?i[-_ ]?ura|ura[-_ ]?hyou?sh?i', re.I)
|
BACK_COVER_REGEX = re.compile('裏表紙|hyou?sh?i[-_ ]?ura|ura[-_ ]?hyou?sh?i', re.I)
|
||||||
BONUS_REGEX = re.compile('設定|キャラ|特典|ポスター|bonus', re.I)
|
BONUS_REGEX = re.compile('設定|キャラ|特典|ポスター|bonus', re.I)
|
||||||
EPILOGUE_REGEX = re.compile('after|後日談|おまけ', re.I)
|
EPILOGUE_REGEX = re.compile('after|後日談|おまけ', re.I)
|
||||||
|
AFTERWORD_REGEX = re.compile('あとがき', re.I)
|
||||||
SPLITS = [
|
SPLITS = [
|
||||||
{ 'later': TEXTLESS_REGEX },
|
{ 'later': TEXTLESS_REGEX },
|
||||||
{ 'later': FXLESS_REGEX },
|
{ 'later': FXLESS_REGEX },
|
||||||
{ 'earlier': FRONT_COVER_REGEX, 'later': BACK_COVER_REGEX },
|
{ 'earlier': FRONT_COVER_REGEX, 'later': BACK_COVER_REGEX },
|
||||||
{ 'later': BONUS_REGEX },
|
{ 'later': BONUS_REGEX },
|
||||||
|
{ 'later': AFTERWORD_REGEX },
|
||||||
{ 'later': EPILOGUE_REGEX },
|
{ 'later': EPILOGUE_REGEX },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -264,7 +267,7 @@ async def fetch_async(args):
|
||||||
print(f'Fetching DLSite metadata for {work_id}')
|
print(f'Fetching DLSite metadata for {work_id}')
|
||||||
dlsite_metadata = await api.get_work(work_id)
|
dlsite_metadata = await api.get_work(work_id)
|
||||||
if dlsite_metadata.work_type not in [dlsite_async.WorkType.MANGA, dlsite_async.WorkType.CG_ILLUSTRATIONS]:
|
if dlsite_metadata.work_type not in [dlsite_async.WorkType.MANGA, dlsite_async.WorkType.CG_ILLUSTRATIONS]:
|
||||||
print(f'Work {work_id} is not a manga or CG set, skipping')
|
warn(f'Work {work_id} is not a manga or CG set, skipping')
|
||||||
any_warnings = True
|
any_warnings = True
|
||||||
continue
|
continue
|
||||||
db_row = {
|
db_row = {
|
||||||
|
@ -904,20 +907,39 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
|
||||||
def alphabetic_numbering(entries, start_point):
|
def alphabetic_numbering(entries, start_point):
|
||||||
debug(f'Finding alphabetic numbering from start point {start_point} for {entries}')
|
debug(f'Finding alphabetic numbering from start point {start_point} for {entries}')
|
||||||
alphabetized = {}
|
alphabetized = {}
|
||||||
|
prefix_suffix = None
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
ending = nfc(entry.stem)[start_point:].strip(' -_()')
|
ending = nfc(entry.stem)[start_point:]
|
||||||
debug(f'{entry} has ending {ending}')
|
debug(f'{entry} has ending {ending}')
|
||||||
if len(ending) > 1:
|
|
||||||
debug('Ending is more than one character, giving up')
|
ending_match = ALPHABETIC_NUMBERING_REGEX.fullmatch(ending)
|
||||||
|
if not ending_match:
|
||||||
|
debug('Ending has more than one letter, giving up')
|
||||||
return None
|
return None
|
||||||
index = 0 if ending == '' else ord(ending.lower()) - ord('a') + 1
|
|
||||||
if index < 0 or index > 26:
|
current_prefix_suffix = (ending_match.group('prefix'), ending_match.group('suffix') or '')
|
||||||
|
if prefix_suffix is None:
|
||||||
|
prefix_suffix = current_prefix_suffix
|
||||||
|
elif current_prefix_suffix != prefix_suffix:
|
||||||
|
debug(f'Ending prefix/suffix does not match {prefix_suffix}, giving up')
|
||||||
|
return None
|
||||||
|
|
||||||
|
ending_letter = (ending_match.group('letter') or '').lower()
|
||||||
|
if ending_letter == '':
|
||||||
|
index = 0
|
||||||
|
elif ending_letter >= 'a' and ending_letter <= 'z':
|
||||||
|
index = ord(ending_letter) - ord('a') + 1
|
||||||
|
elif ending_letter >= 'a' and ending_letter <= 'z':
|
||||||
|
index = ord(ending_letter) - ord('a') + 1
|
||||||
|
else:
|
||||||
debug('Ending is not a letter, giving up')
|
debug('Ending is not a letter, giving up')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if (index,) in alphabetized:
|
if (index,) in alphabetized:
|
||||||
debug(f'Index value {index} is already present, giving up')
|
debug(f'Index value {index} is already present, giving up')
|
||||||
return None
|
return None
|
||||||
alphabetized[(index,)] = [entry]
|
alphabetized[(index,)] = [entry]
|
||||||
|
|
||||||
return alphabetized
|
return alphabetized
|
||||||
|
|
||||||
def check_extension(path, exts):
|
def check_extension(path, exts):
|
||||||
|
|
Loading…
Reference in a new issue