expand bonus regex, refactor regex splitting code

This commit is contained in:
xenofem 2024-02-13 10:45:21 -05:00
parent 1adabbe5e6
commit 7a96bc5655

View file

@ -26,12 +26,20 @@ DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
FAKKU_ID_REGEX = re.compile('.*_FAKKU$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
TEXTLESS_REGEX = re.compile('(台詞|セリフ|せりふ|テキスト|文字)((な|無)し|抜き)|notext|textless', re.IGNORECASE)
BONUS_REGEX = re.compile('設定', re.IGNORECASE)
EPILOGUE_REGEX = re.compile('after|後日談|おまけ', re.IGNORECASE)
HI_RES_REGEX = re.compile('高解像度', re.IGNORECASE) HI_RES_REGEX = re.compile('高解像度', re.IGNORECASE)
TEXTLESS_REGEX = re.compile('(台詞|セリフ|せりふ|テキスト|文字)((な|無)し|抜き)|notext|textless', re.IGNORECASE)
FRONT_COVER_REGEX = re.compile('(^|[^裏])表紙|cover|hyoushi', re.IGNORECASE) FRONT_COVER_REGEX = re.compile('(^|[^裏])表紙|cover|hyoushi', re.IGNORECASE)
BACK_COVER_REGEX = re.compile('裏表紙', re.IGNORECASE) BACK_COVER_REGEX = re.compile('裏表紙', re.IGNORECASE)
BONUS_REGEX = re.compile('設定|キャラ', re.IGNORECASE)
EPILOGUE_REGEX = re.compile('after|後日談|おまけ', re.IGNORECASE)
SPLITS = [
{ 'later': TEXTLESS_REGEX },
{ 'earlier': FRONT_COVER_REGEX, 'later': BACK_COVER_REGEX },
{ 'later': BONUS_REGEX },
{ 'later': EPILOGUE_REGEX },
]
ALT_VERSIONS = [ ALT_VERSIONS = [
'褐色', '褐色',
'日焼け', '日焼け',
@ -526,21 +534,10 @@ def collate_from_paths(srcs, dest, start_index, exclude):
if len(descendant_files_ignore(lo_res_dir, exclude)) == len(descendant_files_ignore(hi_res_dir, exclude)): if len(descendant_files_ignore(lo_res_dir, exclude)) == len(descendant_files_ignore(hi_res_dir, exclude)):
return collate_from_paths([hi_res_dir], dest, start_index, exclude) return collate_from_paths([hi_res_dir], dest, start_index, exclude)
textless_split = try_collate_split_regex(srcs, dest, start_index, exclude, later=TEXTLESS_REGEX) for regexes in SPLITS:
if textless_split != False: split_attempt = try_collate_split_regex(srcs, dest, start_index, exclude, **regexes)
return textless_split if split_attempt != False:
return split_attempt
cover_split = try_collate_split_regex(srcs, dest, start_index, exclude, earlier=FRONT_COVER_REGEX, later=BACK_COVER_REGEX)
if cover_split != False:
return cover_split
bonus_split = try_collate_split_regex(srcs, dest, start_index, exclude, later=BONUS_REGEX)
if bonus_split != False:
return bonus_split
epilogue_split = try_collate_split_regex(srcs, dest, start_index, exclude, later=EPILOGUE_REGEX)
if epilogue_split != False:
return epilogue_split
if all(src.is_file() and is_image(src) for src in srcs): if all(src.is_file() and is_image(src) for src in srcs):
ordering = complete_prefix_number_ordering(srcs) ordering = complete_prefix_number_ordering(srcs)