From 51243aca6e087ed25b30d2e832f488cf7d80b891 Mon Sep 17 00:00:00 2001 From: xenofem Date: Wed, 7 Feb 2024 17:12:02 -0500 Subject: [PATCH 1/2] add more textless regex, fix bug in alphabetized uniqueness checker --- dlibrary/dlibrary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 02371dc..189fde7 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -26,7 +26,7 @@ DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$') FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$') -TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') +TEXTLESS_REGEX = re.compile('(台詞|セリフ|テキスト|文字)(な|無)し|notext|textless') EPILOGUE_REGEX = re.compile('after|後日談') ALT_VERSIONS = [ '褐色', @@ -310,7 +310,7 @@ def alphabetic_numbering(entries, start_point): if len(ending) > 1: return None index = 0 if ending == '' else ord(ending.lower()) - ord('a') - if index in alphabetized: + if (index,) in alphabetized: return None alphabetized[(index,)] = [entry] indices = list(alphabetized.keys()) From 9ff18f933b6eaf691486ba8011fd23e730fa6784 Mon Sep 17 00:00:00 2001 From: xenofem Date: Wed, 7 Feb 2024 17:42:18 -0500 Subject: [PATCH 2/2] detect cover images and hi-res editions --- dlibrary/dlibrary.py | 53 ++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 189fde7..374386f 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -28,6 +28,8 @@ FAKKU_ID_REGEX = re.compile('.*_FAKKU$') TEXTLESS_REGEX = re.compile('(台詞|セリフ|テキスト|文字)(な|無)し|notext|textless') EPILOGUE_REGEX = re.compile('after|後日談') +HI_RES_REGEX = re.compile('高解像度') +COVER_REGEX = re.compile('表紙') ALT_VERSIONS = [ '褐色', '日焼け', @@ -387,25 +389,36 @@ def collate(args): collation_staging_area.rmdir() con.close() -def collate_regex_later(srcs, dest, regex, start_index): - matching = [] - nonmatching = [] +def collate_split_regex(srcs, dest, start_index, earlier=None, later=None): + early_srcs = [] + middle_srcs = [] + late_srcs = [] for src in srcs: - if regex.search(src.name): - matching.append(src) + if earlier and earlier.search(src.name): + early_srcs.append(src) + elif later and later.search(src.name): + late_srcs.append(src) else: - nonmatching.append(src) + middle_srcs.append(src) - if not (matching and nonmatching): + if sum(1 for l in [early_srcs, middle_srcs, late_srcs] if l) <= 1: return False - nonmatching_pages = collate_from_paths(nonmatching, dest, start_index) - if nonmatching_pages is None: + early_page_count = collate_from_paths(early_srcs, dest, start_index) + if early_page_count is None: return None - matching_pages = collate_from_paths(matching, dest, start_index+nonmatching_pages) - if matching_pages is None: + start_index += early_page_count + + middle_page_count = collate_from_paths(middle_srcs, dest, start_index) + if middle_page_count is None: return None - return nonmatching_pages + matching_pages + start_index += middle_page_count + + late_page_count = collate_from_paths(late_srcs, dest, start_index) + if late_page_count is None: + return None + + return early_page_count + middle_page_count + late_page_count def standalone_image_size(filepath): with Image.open(filepath) as im: @@ -441,14 +454,26 @@ def collate_from_paths(srcs, dest, start_index): if len(srcs) == 0: return 0 - textless_split = collate_regex_later(srcs, dest, TEXTLESS_REGEX, start_index) + if len(srcs) == 2 and all(src.is_dir() for src in srcs): + hi_res_dirs = [src for src in srcs if HI_RES_REGEX.search(src.name)] + if len(hi_res_dirs) == 1: + hi_res_dir = hi_res_dirs[0] + lo_res_dir = next(src for src in srcs if src != hi_res_dir) + if len(descendant_files_ignore(lo_res_dir)) == len(descendant_files_ignore(hi_res_dir)): + return collate_from_paths([hi_res_dir], dest, start_index) + + textless_split = collate_split_regex(srcs, dest, start_index, later=TEXTLESS_REGEX) if textless_split != False: return textless_split - epilogue_split = collate_regex_later(srcs, dest, EPILOGUE_REGEX, start_index) + epilogue_split = collate_split_regex(srcs, dest, start_index, later=EPILOGUE_REGEX) if epilogue_split != False: return epilogue_split + cover_split = collate_split_regex(srcs, dest, start_index, earlier=COVER_REGEX) + if cover_split != False: + return cover_split + if all(src.is_file() and src.suffix.lower() in IMAGE_FILE_EXTENSIONS for src in srcs): ordering = complete_prefix_number_ordering(srcs) if ordering: