detect cover images and hi-res editions
This commit is contained in:
parent
51243aca6e
commit
9ff18f933b
|
@ -28,6 +28,8 @@ FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
|
||||||
|
|
||||||
TEXTLESS_REGEX = re.compile('(台詞|セリフ|テキスト|文字)(な|無)し|notext|textless')
|
TEXTLESS_REGEX = re.compile('(台詞|セリフ|テキスト|文字)(な|無)し|notext|textless')
|
||||||
EPILOGUE_REGEX = re.compile('after|後日談')
|
EPILOGUE_REGEX = re.compile('after|後日談')
|
||||||
|
HI_RES_REGEX = re.compile('高解像度')
|
||||||
|
COVER_REGEX = re.compile('表紙')
|
||||||
ALT_VERSIONS = [
|
ALT_VERSIONS = [
|
||||||
'褐色',
|
'褐色',
|
||||||
'日焼け',
|
'日焼け',
|
||||||
|
@ -387,25 +389,36 @@ def collate(args):
|
||||||
collation_staging_area.rmdir()
|
collation_staging_area.rmdir()
|
||||||
con.close()
|
con.close()
|
||||||
|
|
||||||
def collate_regex_later(srcs, dest, regex, start_index):
|
def collate_split_regex(srcs, dest, start_index, earlier=None, later=None):
|
||||||
matching = []
|
early_srcs = []
|
||||||
nonmatching = []
|
middle_srcs = []
|
||||||
|
late_srcs = []
|
||||||
for src in srcs:
|
for src in srcs:
|
||||||
if regex.search(src.name):
|
if earlier and earlier.search(src.name):
|
||||||
matching.append(src)
|
early_srcs.append(src)
|
||||||
|
elif later and later.search(src.name):
|
||||||
|
late_srcs.append(src)
|
||||||
else:
|
else:
|
||||||
nonmatching.append(src)
|
middle_srcs.append(src)
|
||||||
|
|
||||||
if not (matching and nonmatching):
|
if sum(1 for l in [early_srcs, middle_srcs, late_srcs] if l) <= 1:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
nonmatching_pages = collate_from_paths(nonmatching, dest, start_index)
|
early_page_count = collate_from_paths(early_srcs, dest, start_index)
|
||||||
if nonmatching_pages is None:
|
if early_page_count is None:
|
||||||
return None
|
return None
|
||||||
matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages)
|
start_index += early_page_count
|
||||||
if matching_pages is None:
|
|
||||||
|
middle_page_count = collate_from_paths(middle_srcs, dest, start_index)
|
||||||
|
if middle_page_count is None:
|
||||||
return None
|
return None
|
||||||
return nonmatching_pages + matching_pages
|
start_index += middle_page_count
|
||||||
|
|
||||||
|
late_page_count = collate_from_paths(late_srcs, dest, start_index)
|
||||||
|
if late_page_count is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return early_page_count + middle_page_count + late_page_count
|
||||||
|
|
||||||
def standalone_image_size(filepath):
|
def standalone_image_size(filepath):
|
||||||
with Image.open(filepath) as im:
|
with Image.open(filepath) as im:
|
||||||
|
@ -441,14 +454,26 @@ def collate_from_paths(srcs, dest, start_index):
|
||||||
if len(srcs) == 0:
|
if len(srcs) == 0:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index)
|
if len(srcs) == 2 and all(src.is_dir() for src in srcs):
|
||||||
|
hi_res_dirs = [src for src in srcs if HI_RES_REGEX.search(src.name)]
|
||||||
|
if len(hi_res_dirs) == 1:
|
||||||
|
hi_res_dir = hi_res_dirs[0]
|
||||||
|
lo_res_dir = next(src for src in srcs if src != hi_res_dir)
|
||||||
|
if len(descendant_files_ignore(lo_res_dir)) == len(descendant_files_ignore(hi_res_dir)):
|
||||||
|
return collate_from_paths([hi_res_dir], dest, start_index)
|
||||||
|
|
||||||
|
textless_split = collate_split_regex(srcs, dest, start_index, later=TEXTLESS_REGEX)
|
||||||
if textless_split != False:
|
if textless_split != False:
|
||||||
return textless_split
|
return textless_split
|
||||||
|
|
||||||
epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index)
|
epilogue_split = collate_split_regex(srcs, dest, start_index, later=EPILOGUE_REGEX)
|
||||||
if epilogue_split != False:
|
if epilogue_split != False:
|
||||||
return epilogue_split
|
return epilogue_split
|
||||||
|
|
||||||
|
cover_split = collate_split_regex(srcs, dest, start_index, earlier=COVER_REGEX)
|
||||||
|
if cover_split != False:
|
||||||
|
return cover_split
|
||||||
|
|
||||||
if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs):
|
if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs):
|
||||||
ordering = complete_prefix_number_ordering(srcs)
|
ordering = complete_prefix_number_ordering(srcs)
|
||||||
if ordering:
|
if ordering:
|
||||||
|
|
Loading…
Reference in a new issue