From 65017abe00bcad9e5eb50712f42f50d10d7c791c Mon Sep 17 00:00:00 2001 From: xenofem Date: Sat, 2 Mar 2024 00:13:30 -0500 Subject: [PATCH] filter by language preference when collating --- dlibrary/dlibrary.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index b9fe859..99e7b72 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -269,6 +269,10 @@ class Collator: if len(srcs) == 0: return True + select_language = self.try_collate_select_language(srcs) + if select_language is not False: + return select_language + if len(srcs) == 2 and all(src.is_dir() for src in srcs): for quality in IMAGE_QUALITY_REGEXES: def a_not_b(a, b, src): @@ -417,6 +421,18 @@ class Collator: else: return False + def try_collate_select_language(self, srcs): + if self.locale not in LANGUAGE_REGEXES: + return False + if not all(any(lang.search(nname(src)) for lang in LANGUAGE_REGEXES.values()) for src in srcs): + return False + + srcs_matching_language = [src for src in srcs if LANGUAGE_REGEXES[self.locale].search(nname(src))] + if len(srcs_matching_language) == len(srcs) or len(srcs_matching_language) == 0: + return False + + return self.collate_from_paths(srcs_matching_language) + def image_xrefs(pdf): images_by_page = [page.get_images() for page in pdf] if all(len(images) == 1 for images in images_by_page):