display progress for extracting pdf images as well as for analyzing them

This commit is contained in:
xenofem 2024-03-12 15:56:40 -04:00
parent 18fbc7f8dc
commit 26ec1901c3

View file

@ -388,10 +388,14 @@ class Collator:
return None return None
self.dest.mkdir(parents=True, exist_ok=True) self.dest.mkdir(parents=True, exist_ok=True)
print(f'0 pages collated...', end='')
for (idx, image) in enumerate(images, start=self.index): for (idx, image) in enumerate(images, start=self.index):
file_path = self.dest / f'{idx:04d}.{image["ext"]}' file_path = self.dest / f'{idx:04d}.{image["ext"]}'
with open(file_path, 'wb') as f: with open(file_path, 'wb') as f:
f.write(image["image"]) f.write(image["image"])
print(f'\x1b[2K\r{idx+1-self.index} pages collated...', end='')
print()
self.index += pdf.page_count self.index += pdf.page_count
return True return True
@ -575,7 +579,7 @@ def pdf_images(pdf, strategy):
return (extract_image(pdf, images[0][0]) for (images, _) in images_by_page) return (extract_image(pdf, images[0][0]) for (images, _) in images_by_page)
print("Checking PDF images the quick way failed, trying the slow way") print("Checking PDF images the quick way failed, trying the slow way")
print(f'0/{pdf.page_count} pages processed...', end='') print(f'0/{pdf.page_count} pages analyzed...', end='')
image_extractors = [] image_extractors = []
for (idx, page) in enumerate(pdf): for (idx, page) in enumerate(pdf):
page_images = page.get_image_info(xrefs=True) page_images = page.get_image_info(xrefs=True)
@ -610,7 +614,7 @@ def pdf_images(pdf, strategy):
display_sixel_page(page) display_sixel_page(page)
choice = input(f'[N]ope out / [c]onvert page{"" if xref is None else " / e[x]tract image"} / [d]rop page / [s]how page? [n/c{"" if xref is None else "/x"}/d/s] ') choice = input(f'[N]ope out / [c]onvert page{"" if xref is None else " / e[x]tract image"} / [d]rop page / [s]how page? [n/c{"" if xref is None else "/x"}/d/s] ')
print(f'\x1b[2K\r{idx+1}/{pdf.page_count} pages processed...', end=('' if idx+1 < pdf.page_count else '\n')) print(f'\x1b[2K\r{idx+1}/{pdf.page_count} pages analyzed...', end=('' if idx+1 < pdf.page_count else '\n'))
return (extractor() for extractor in image_extractors) return (extractor() for extractor in image_extractors)