updates for everyone

This commit is contained in:
xenofem 2025-09-25 00:45:33 -04:00
parent fda5f2bbba
commit 2c7484b538
4 changed files with 1283 additions and 895 deletions

View file

@ -1,7 +1,7 @@
use std::{collections::HashMap, sync::Arc};
use lazy_static::lazy_static;
use pdf::{backend::Backend, content::{Op, Point, TextDrawAdjusted}};
use pdf::{backend::Backend, content::{Op, Point, TextDrawAdjusted}, file::{NoCache, NoLog}};
use regex::Regex;
use time::Date;
@ -28,7 +28,7 @@ pub enum Error {
}
impl DataSet {
pub fn extract<B: Backend>(doc: &pdf::file::File<B>) -> Result<Self, Error> {
pub fn extract<B: Backend>(doc: &pdf::file::File<B, NoCache, NoCache, NoLog>) -> Result<Self, Error> {
let mut doc_iter = DocumentIterator::new(doc).peekable();
let mut columns: Vec<(Arc<String>, f32)> = Vec::new();
@ -113,13 +113,13 @@ struct DocumentIterator<'a> {
}
impl<'a> DocumentIterator<'a> {
fn new<B: Backend>(document: &'a pdf::file::File<B>) -> Self {
fn new<B: Backend>(document: &'a pdf::file::File<B, NoCache, NoCache, NoLog>) -> Self {
Self {
point: Point { x: 0.0, y: 0.0 },
operations: Box::new(
document
.pages()
.filter_map(|page| Some(page.ok()?.contents.clone()?.operations(document).ok()?.into_iter()))
.filter_map(|page| Some(page.ok()?.contents.clone()?.operations(&document.resolver()).ok()?.into_iter()))
.flatten(),
),
}