diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index b9fe859..99e7b72 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -269,6 +269,10 @@ class Collator: if len(srcs) == 0: return True + select_language = self.try_collate_select_language(srcs) + if select_language is not False: + return select_language + if len(srcs) == 2 and all(src.is_dir() for src in srcs): for quality in IMAGE_QUALITY_REGEXES: def a_not_b(a, b, src): @@ -417,6 +421,18 @@ class Collator: else: return False + def try_collate_select_language(self, srcs): + if self.locale not in LANGUAGE_REGEXES: + return False + if not all(any(lang.search(nname(src)) for lang in LANGUAGE_REGEXES.values()) for src in srcs): + return False + + srcs_matching_language = [src for src in srcs if LANGUAGE_REGEXES[self.locale].search(nname(src))] + if len(srcs_matching_language) == len(srcs) or len(srcs_matching_language) == 0: + return False + + return self.collate_from_paths(srcs_matching_language) + def image_xrefs(pdf): images_by_page = [page.get_images() for page in pdf] if all(len(images) == 1 for images in images_by_page):