From 6a8c8c0f1d2a7727aeb329cdf10f81b72983acad Mon Sep 17 00:00:00 2001 From: xenofem Date: Sun, 4 Aug 2024 00:59:48 -0400 Subject: [PATCH 1/2] don't accidentally downscale embedded images in-place while displaying previews --- dlibrary/dlibrary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 61a1391..8bb45aa 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -742,6 +742,7 @@ def get_displayed_image_xref(page): def display_sixel_pixmap(pixmap_bytes): s = BytesIO() image = Image.open(BytesIO(pixmap_bytes)) + image.thumbnail(size=(800, 800)) width, height = image.size try: @@ -811,7 +812,6 @@ def pdf_image_extractors(pdf, strategy): display_sixel_pixmap(page.get_pixmap(dpi=PDF_PREVIEW_DPI).tobytes('png')) if xref is not None: pixmap = fitz.Pixmap(pdf, xref) - pixmap.shrink(2) display_sixel_pixmap(pixmap.tobytes('png')) choice = input(f'[N]ope out / [c]onvert page{"" if xref is None else " / e[x]tract image"} / [d]rop page / [s]how page? [n/c{"" if xref is None else "/x"}/d/s] ') From 8f798e8c210069ddab37d63623c60e4f3fd39b19 Mon Sep 17 00:00:00 2001 From: xenofem Date: Sun, 4 Aug 2024 01:18:18 -0400 Subject: [PATCH 2/2] figure out optimal DPI for pdf page conversion --- dlibrary/dlibrary.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 8bb45aa..049e583 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -779,6 +779,25 @@ def display_sixel_pixmap(pixmap_bytes): finally: sixel_output_unref(output) +def naive_convert_page(page): + return { 'ext': 'png', 'image': page.get_pixmap(dpi=PDF_CONVERSION_DPI).tobytes('png') } + +def convert_page(pdf, page): + xref = get_displayed_image_xref(page) + if xref is None: + debug('Page has multiple images, converting with naive DPI approach') + return naive_convert_page(page) + + image_rect = page.get_image_rects(xref)[0] + image = extract_image(pdf, xref) + scale_factor = image['width'] / image_rect.width + if scale_factor < PDF_CONVERSION_DPI / 72: + debug(f'Image scale factor is lower than {PDF_CONVERSION_DPI}dpi, using higher dpi instead') + return naive_convert_page(page) + + debug(f'Page has single image, converting with scale factor {scale_factor}') + return { 'ext': 'png', 'image': page.get_pixmap(matrix=fitz.Matrix(scale_factor, scale_factor)).tobytes('png') } + def pdf_image_extractors(pdf, strategy): print(f'0/{pdf.page_count} pages analyzed...', end='') image_extractors = [] @@ -796,7 +815,7 @@ def pdf_image_extractors(pdf, strategy): if choice.lower().startswith('c'): if choice == strategy: print(f'Converting page {idx+1}') - image_extractors.append(lambda p=page: { 'ext': 'png', 'image': p.get_pixmap(dpi=PDF_CONVERSION_DPI).tobytes('png') }) + image_extractors.append(lambda p=pdf, pp=page: convert_page(p, pp)) break if xref is not None and (choice.lower().startswith('x') or choice.lower() == 'extract'): if choice == strategy: