pymupdf TextPage.extractBLOCKS() no longer includes images by default
This commit is contained in:
parent
1d798476c9
commit
83a836f9b5
|
@ -697,7 +697,7 @@ def block_relevant(block):
|
||||||
return block_is_image(block) or not IRRELEVANT_PDF_BLOCK_REGEX.search(block_text(block))
|
return block_is_image(block) or not IRRELEVANT_PDF_BLOCK_REGEX.search(block_text(block))
|
||||||
|
|
||||||
def relevant_blocks(page):
|
def relevant_blocks(page):
|
||||||
blocks = page.get_text('blocks')
|
blocks = page.get_text('blocks', flags=(fitz.TEXTFLAGS_BLOCKS | fitz.TEXT_PRESERVE_IMAGES))
|
||||||
return [block for block in blocks if block_relevant(block)]
|
return [block for block in blocks if block_relevant(block)]
|
||||||
|
|
||||||
def is_single_image(page):
|
def is_single_image(page):
|
||||||
|
|
Loading…
Reference in a new issue