updates for everyone
This commit is contained in:
parent
fda5f2bbba
commit
2c7484b538
4 changed files with 1283 additions and 895 deletions
2151
Cargo.lock
generated
2151
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
10
Cargo.toml
10
Cargo.toml
|
@ -14,12 +14,12 @@ bytes = "1.1"
|
|||
futures = "0.3"
|
||||
lazy_static = "1.4"
|
||||
log = "0.4.16"
|
||||
pdf = "0.8"
|
||||
pdf = "0.9"
|
||||
regex = "1.5.5"
|
||||
reqwest = { version = "0.11", features = ["rustls-tls", "stream"], default-features = false }
|
||||
scraper = "0.12"
|
||||
simple_logger = { version = "4.2", features = ["stderr"] }
|
||||
thiserror = "1"
|
||||
reqwest = { version = "0.12.23", features = ["rustls-tls", "stream"], default-features = false }
|
||||
scraper = "0.24"
|
||||
simple_logger = { version = "5", features = ["stderr"] }
|
||||
thiserror = "2"
|
||||
time = { version = "0.3.9", features = ["formatting", "macros", "parsing"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tokio-stream = "0.1.8"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use pdf::{backend::Backend, content::{Op, Point, TextDrawAdjusted}};
|
||||
use pdf::{backend::Backend, content::{Op, Point, TextDrawAdjusted}, file::{NoCache, NoLog}};
|
||||
use regex::Regex;
|
||||
use time::Date;
|
||||
|
||||
|
@ -28,7 +28,7 @@ pub enum Error {
|
|||
}
|
||||
|
||||
impl DataSet {
|
||||
pub fn extract<B: Backend>(doc: &pdf::file::File<B>) -> Result<Self, Error> {
|
||||
pub fn extract<B: Backend>(doc: &pdf::file::File<B, NoCache, NoCache, NoLog>) -> Result<Self, Error> {
|
||||
let mut doc_iter = DocumentIterator::new(doc).peekable();
|
||||
|
||||
let mut columns: Vec<(Arc<String>, f32)> = Vec::new();
|
||||
|
@ -113,13 +113,13 @@ struct DocumentIterator<'a> {
|
|||
}
|
||||
|
||||
impl<'a> DocumentIterator<'a> {
|
||||
fn new<B: Backend>(document: &'a pdf::file::File<B>) -> Self {
|
||||
fn new<B: Backend>(document: &'a pdf::file::File<B, NoCache, NoCache, NoLog>) -> Self {
|
||||
Self {
|
||||
point: Point { x: 0.0, y: 0.0 },
|
||||
operations: Box::new(
|
||||
document
|
||||
.pages()
|
||||
.filter_map(|page| Some(page.ok()?.contents.clone()?.operations(document).ok()?.into_iter()))
|
||||
.filter_map(|page| Some(page.ok()?.contents.clone()?.operations(&document.resolver()).ok()?.into_iter()))
|
||||
.flatten(),
|
||||
),
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ use std::{
|
|||
use futures::{sink::SinkExt, TryStreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use log::{info, warn};
|
||||
use pdf::file::{NoCache, NoLog};
|
||||
use reqwest::Url;
|
||||
use scraper::Selector;
|
||||
use time::PrimitiveDateTime;
|
||||
|
@ -13,7 +14,7 @@ use tokio_util::codec;
|
|||
|
||||
lazy_static! {
|
||||
static ref CHARTS_URL: Url = Url::parse("https://www.mwra.com/biobot/biobotdata.htm").unwrap();
|
||||
static ref PDF_SEL: Selector = Selector::parse(r#"a[href^="/media/file/mwradata"][href$="-data"]"#).unwrap();
|
||||
static ref PDF_SEL: Selector = Selector::parse(r#"a[href^="/media/file/mwradata"][href$="-datapdf"]"#).unwrap();
|
||||
static ref MIN_CHECK_INTERVAL: Duration = Duration::from_secs(300);
|
||||
}
|
||||
|
||||
|
@ -54,7 +55,7 @@ impl PdfFetcher {
|
|||
})
|
||||
}
|
||||
|
||||
pub async fn fetch(&mut self) -> Result<pdf::file::File<Vec<u8>>, Error> {
|
||||
pub async fn fetch(&mut self) -> Result<pdf::file::File<Vec<u8>, NoCache, NoCache, NoLog>, Error> {
|
||||
info!("Fetching data PDF");
|
||||
|
||||
let cache_modtime = match tokio::fs::File::open(&self.cached_pdf_path).await {
|
||||
|
@ -145,7 +146,7 @@ impl PdfFetcher {
|
|||
self.cached_pdf()
|
||||
}
|
||||
|
||||
fn cached_pdf(&self) -> Result<pdf::file::File<Vec<u8>>, Error> {
|
||||
Ok(pdf::file::File::open(&self.cached_pdf_path)?)
|
||||
fn cached_pdf(&self) -> Result<pdf::file::File<Vec<u8>, NoCache, NoCache, NoLog>, Error> {
|
||||
Ok(pdf::file::FileOptions::uncached().open(&self.cached_pdf_path)?)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue