filter by language preference when collating

This commit is contained in:
xenofem 2024-03-02 00:13:30 -05:00
parent 0be720599d
commit 65017abe00

View file

@ -269,6 +269,10 @@ class Collator:
if len(srcs) == 0: if len(srcs) == 0:
return True return True
select_language = self.try_collate_select_language(srcs)
if select_language is not False:
return select_language
if len(srcs) == 2 and all(src.is_dir() for src in srcs): if len(srcs) == 2 and all(src.is_dir() for src in srcs):
for quality in IMAGE_QUALITY_REGEXES: for quality in IMAGE_QUALITY_REGEXES:
def a_not_b(a, b, src): def a_not_b(a, b, src):
@ -417,6 +421,18 @@ class Collator:
else: else:
return False return False
def try_collate_select_language(self, srcs):
if self.locale not in LANGUAGE_REGEXES:
return False
if not all(any(lang.search(nname(src)) for lang in LANGUAGE_REGEXES.values()) for src in srcs):
return False
srcs_matching_language = [src for src in srcs if LANGUAGE_REGEXES[self.locale].search(nname(src))]
if len(srcs_matching_language) == len(srcs) or len(srcs_matching_language) == 0:
return False
return self.collate_from_paths(srcs_matching_language)
def image_xrefs(pdf): def image_xrefs(pdf):
images_by_page = [page.get_images() for page in pdf] images_by_page = [page.get_images() for page in pdf]
if all(len(images) == 1 for images in images_by_page): if all(len(images) == 1 for images in images_by_page):