Compare commits

..

No commits in common. "8f798e8c210069ddab37d63623c60e4f3fd39b19" and "75f31b83e9021bb340dbbbb6eb27a775b88b7247" have entirely different histories.

View file

@ -742,7 +742,6 @@ def get_displayed_image_xref(page):
def display_sixel_pixmap(pixmap_bytes): def display_sixel_pixmap(pixmap_bytes):
s = BytesIO() s = BytesIO()
image = Image.open(BytesIO(pixmap_bytes)) image = Image.open(BytesIO(pixmap_bytes))
image.thumbnail(size=(800, 800))
width, height = image.size width, height = image.size
try: try:
@ -779,25 +778,6 @@ def display_sixel_pixmap(pixmap_bytes):
finally: finally:
sixel_output_unref(output) sixel_output_unref(output)
def naive_convert_page(page):
return { 'ext': 'png', 'image': page.get_pixmap(dpi=PDF_CONVERSION_DPI).tobytes('png') }
def convert_page(pdf, page):
xref = get_displayed_image_xref(page)
if xref is None:
debug('Page has multiple images, converting with naive DPI approach')
return naive_convert_page(page)
image_rect = page.get_image_rects(xref)[0]
image = extract_image(pdf, xref)
scale_factor = image['width'] / image_rect.width
if scale_factor < PDF_CONVERSION_DPI / 72:
debug(f'Image scale factor is lower than {PDF_CONVERSION_DPI}dpi, using higher dpi instead')
return naive_convert_page(page)
debug(f'Page has single image, converting with scale factor {scale_factor}')
return { 'ext': 'png', 'image': page.get_pixmap(matrix=fitz.Matrix(scale_factor, scale_factor)).tobytes('png') }
def pdf_image_extractors(pdf, strategy): def pdf_image_extractors(pdf, strategy):
print(f'0/{pdf.page_count} pages analyzed...', end='') print(f'0/{pdf.page_count} pages analyzed...', end='')
image_extractors = [] image_extractors = []
@ -815,7 +795,7 @@ def pdf_image_extractors(pdf, strategy):
if choice.lower().startswith('c'): if choice.lower().startswith('c'):
if choice == strategy: if choice == strategy:
print(f'Converting page {idx+1}') print(f'Converting page {idx+1}')
image_extractors.append(lambda p=pdf, pp=page: convert_page(p, pp)) image_extractors.append(lambda p=page: { 'ext': 'png', 'image': p.get_pixmap(dpi=PDF_CONVERSION_DPI).tobytes('png') })
break break
if xref is not None and (choice.lower().startswith('x') or choice.lower() == 'extract'): if xref is not None and (choice.lower().startswith('x') or choice.lower() == 'extract'):
if choice == strategy: if choice == strategy:
@ -831,6 +811,7 @@ def pdf_image_extractors(pdf, strategy):
display_sixel_pixmap(page.get_pixmap(dpi=PDF_PREVIEW_DPI).tobytes('png')) display_sixel_pixmap(page.get_pixmap(dpi=PDF_PREVIEW_DPI).tobytes('png'))
if xref is not None: if xref is not None:
pixmap = fitz.Pixmap(pdf, xref) pixmap = fitz.Pixmap(pdf, xref)
pixmap.shrink(2)
display_sixel_pixmap(pixmap.tobytes('png')) display_sixel_pixmap(pixmap.tobytes('png'))
choice = input(f'[N]ope out / [c]onvert page{"" if xref is None else " / e[x]tract image"} / [d]rop page / [s]how page? [n/c{"" if xref is None else "/x"}/d/s] ') choice = input(f'[N]ope out / [c]onvert page{"" if xref is None else " / e[x]tract image"} / [d]rop page / [s]how page? [n/c{"" if xref is None else "/x"}/d/s] ')