figure out optimal DPI for pdf page conversion
This commit is contained in:
parent
6a8c8c0f1d
commit
8f798e8c21
|
@ -779,6 +779,25 @@ def display_sixel_pixmap(pixmap_bytes):
|
||||||
finally:
|
finally:
|
||||||
sixel_output_unref(output)
|
sixel_output_unref(output)
|
||||||
|
|
||||||
|
def naive_convert_page(page):
|
||||||
|
return { 'ext': 'png', 'image': page.get_pixmap(dpi=PDF_CONVERSION_DPI).tobytes('png') }
|
||||||
|
|
||||||
|
def convert_page(pdf, page):
|
||||||
|
xref = get_displayed_image_xref(page)
|
||||||
|
if xref is None:
|
||||||
|
debug('Page has multiple images, converting with naive DPI approach')
|
||||||
|
return naive_convert_page(page)
|
||||||
|
|
||||||
|
image_rect = page.get_image_rects(xref)[0]
|
||||||
|
image = extract_image(pdf, xref)
|
||||||
|
scale_factor = image['width'] / image_rect.width
|
||||||
|
if scale_factor < PDF_CONVERSION_DPI / 72:
|
||||||
|
debug(f'Image scale factor is lower than {PDF_CONVERSION_DPI}dpi, using higher dpi instead')
|
||||||
|
return naive_convert_page(page)
|
||||||
|
|
||||||
|
debug(f'Page has single image, converting with scale factor {scale_factor}')
|
||||||
|
return { 'ext': 'png', 'image': page.get_pixmap(matrix=fitz.Matrix(scale_factor, scale_factor)).tobytes('png') }
|
||||||
|
|
||||||
def pdf_image_extractors(pdf, strategy):
|
def pdf_image_extractors(pdf, strategy):
|
||||||
print(f'0/{pdf.page_count} pages analyzed...', end='')
|
print(f'0/{pdf.page_count} pages analyzed...', end='')
|
||||||
image_extractors = []
|
image_extractors = []
|
||||||
|
@ -796,7 +815,7 @@ def pdf_image_extractors(pdf, strategy):
|
||||||
if choice.lower().startswith('c'):
|
if choice.lower().startswith('c'):
|
||||||
if choice == strategy:
|
if choice == strategy:
|
||||||
print(f'Converting page {idx+1}')
|
print(f'Converting page {idx+1}')
|
||||||
image_extractors.append(lambda p=page: { 'ext': 'png', 'image': p.get_pixmap(dpi=PDF_CONVERSION_DPI).tobytes('png') })
|
image_extractors.append(lambda p=pdf, pp=page: convert_page(p, pp))
|
||||||
break
|
break
|
||||||
if xref is not None and (choice.lower().startswith('x') or choice.lower() == 'extract'):
|
if xref is not None and (choice.lower().startswith('x') or choice.lower() == 'extract'):
|
||||||
if choice == strategy:
|
if choice == strategy:
|
||||||
|
|
Loading…
Reference in a new issue