figure out optimal DPI for pdf page conversion

don't accidentally downscale embedded images in-place while displaying previews
2024-08-04 01:18:18 -04:00 · 2024-08-04 00:59:48 -04:00
1 changed files with 21 additions and 2 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -742,6 +742,7 @@ def get_displayed_image_xref(page):
 def display_sixel_pixmap(pixmap_bytes):
    s = BytesIO()
    image = Image.open(BytesIO(pixmap_bytes))
    image.thumbnail(size=(800, 800))
    width, height = image.size
    try:
@ -778,6 +779,25 @@ def display_sixel_pixmap(pixmap_bytes):
    finally:
        sixel_output_unref(output)
 def naive_convert_page(page):
    return { 'ext': 'png', 'image': page.get_pixmap(dpi=PDF_CONVERSION_DPI).tobytes('png') }
 def convert_page(pdf, page):
    xref = get_displayed_image_xref(page)
    if xref is None:
        debug('Page has multiple images, converting with naive DPI approach')
        return naive_convert_page(page)
    image_rect = page.get_image_rects(xref)[0]
    image = extract_image(pdf, xref)
    scale_factor = image['width'] / image_rect.width
    if scale_factor < PDF_CONVERSION_DPI / 72:
        debug(f'Image scale factor is lower than {PDF_CONVERSION_DPI}dpi, using higher dpi instead')
        return naive_convert_page(page)
    debug(f'Page has single image, converting with scale factor {scale_factor}')
    return { 'ext': 'png', 'image': page.get_pixmap(matrix=fitz.Matrix(scale_factor, scale_factor)).tobytes('png') }
 def pdf_image_extractors(pdf, strategy):
    print(f'0/{pdf.page_count} pages analyzed...', end='')
    image_extractors = []
@ -795,7 +815,7 @@ def pdf_image_extractors(pdf, strategy):
                if choice.lower().startswith('c'):
                    if choice == strategy:
                        print(f'Converting page {idx+1}')
-                    image_extractors.append(lambda p=page: { 'ext': 'png', 'image': p.get_pixmap(dpi=PDF_CONVERSION_DPI).tobytes('png') })
+                    image_extractors.append(lambda p=pdf, pp=page: convert_page(p, pp))
                    break
                if xref is not None and (choice.lower().startswith('x') or choice.lower() == 'extract'):
                    if choice == strategy:
@ -811,7 +831,6 @@ def pdf_image_extractors(pdf, strategy):
                    display_sixel_pixmap(page.get_pixmap(dpi=PDF_PREVIEW_DPI).tobytes('png'))
                    if xref is not None:
                        pixmap = fitz.Pixmap(pdf, xref)
                        pixmap.shrink(2)
                        display_sixel_pixmap(pixmap.tobytes('png'))
                choice = input(f'[N]ope out / [c]onvert page{"" if xref is None else " / e[x]tract image"} / [d]rop page / [s]how page? [n/c{"" if xref is None else "/x"}/d/s] ')
Author	SHA1	Message	Date
xenofem	8f798e8c21	figure out optimal DPI for pdf page conversion	2024-08-04 01:18:18 -04:00
xenofem	6a8c8c0f1d	don't accidentally downscale embedded images in-place while displaying previews	2024-08-04 00:59:48 -04:00