Compare commits
2 commits
d2eacd6a12
...
2c7484b538
Author | SHA1 | Date | |
---|---|---|---|
2c7484b538 | |||
fda5f2bbba |
5 changed files with 1298 additions and 910 deletions
2151
Cargo.lock
generated
2151
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
10
Cargo.toml
10
Cargo.toml
|
@ -14,12 +14,12 @@ bytes = "1.1"
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
lazy_static = "1.4"
|
lazy_static = "1.4"
|
||||||
log = "0.4.16"
|
log = "0.4.16"
|
||||||
pdf = "0.8"
|
pdf = "0.9"
|
||||||
regex = "1.5.5"
|
regex = "1.5.5"
|
||||||
reqwest = { version = "0.11", features = ["rustls-tls", "stream"], default-features = false }
|
reqwest = { version = "0.12.23", features = ["rustls-tls", "stream"], default-features = false }
|
||||||
scraper = "0.12"
|
scraper = "0.24"
|
||||||
simple_logger = { version = "4.2", features = ["stderr"] }
|
simple_logger = { version = "5", features = ["stderr"] }
|
||||||
thiserror = "1"
|
thiserror = "2"
|
||||||
time = { version = "0.3.9", features = ["formatting", "macros", "parsing"] }
|
time = { version = "0.3.9", features = ["formatting", "macros", "parsing"] }
|
||||||
tokio = { version = "1", features = ["full"] }
|
tokio = { version = "1", features = ["full"] }
|
||||||
tokio-stream = "0.1.8"
|
tokio-stream = "0.1.8"
|
||||||
|
|
30
flake.lock
generated
30
flake.lock
generated
|
@ -6,11 +6,11 @@
|
||||||
"rust-analyzer-src": "rust-analyzer-src"
|
"rust-analyzer-src": "rust-analyzer-src"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1721975407,
|
"lastModified": 1758695884,
|
||||||
"narHash": "sha256-XkNqglPxkfWOkysN5C1aZeoHCozwyRq3nC4jL0IFqlA=",
|
"narHash": "sha256-rnHjtBRkcwRkrUZxg0RqN1qWTG+QC/gj4vn9uzEkBww=",
|
||||||
"owner": "nix-community",
|
"owner": "nix-community",
|
||||||
"repo": "fenix",
|
"repo": "fenix",
|
||||||
"rev": "27128b6e467ced6142264d02a884fde45931e708",
|
"rev": "9cdb79384d02234fb2868eba6c7d390253ef6f83",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -21,11 +21,11 @@
|
||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1721743106,
|
"lastModified": 1758427187,
|
||||||
"narHash": "sha256-adRZhFpBTnHiK3XIELA3IBaApz70HwCYfv7xNrHjebA=",
|
"narHash": "sha256-pHpxZ/IyCwoTQPtFIAG2QaxuSm8jWzrzBGjwQZIttJc=",
|
||||||
"owner": "nixos",
|
"owner": "nixos",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "dc14ed91132ee3a26255d01d8fd0c1f5bff27b2f",
|
"rev": "554be6495561ff07b6c724047bdd7e0716aa7b46",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -37,11 +37,11 @@
|
||||||
},
|
},
|
||||||
"nixpkgs_2": {
|
"nixpkgs_2": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1721924956,
|
"lastModified": 1758427187,
|
||||||
"narHash": "sha256-Sb1jlyRO+N8jBXEX9Pg9Z1Qb8Bw9QyOgLDNMEpmjZ2M=",
|
"narHash": "sha256-pHpxZ/IyCwoTQPtFIAG2QaxuSm8jWzrzBGjwQZIttJc=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "5ad6a14c6bf098e98800b091668718c336effc95",
|
"rev": "554be6495561ff07b6c724047bdd7e0716aa7b46",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -61,11 +61,11 @@
|
||||||
"rust-analyzer-src": {
|
"rust-analyzer-src": {
|
||||||
"flake": false,
|
"flake": false,
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1721909430,
|
"lastModified": 1758620797,
|
||||||
"narHash": "sha256-u3e38jvjbxbbYH3caQFPE7gnNFNBjbkqkHRKBx1AOJs=",
|
"narHash": "sha256-Ly4rHgrixFMBnkbMursVt74mxnntnE6yVdF5QellJ+A=",
|
||||||
"owner": "rust-lang",
|
"owner": "rust-lang",
|
||||||
"repo": "rust-analyzer",
|
"repo": "rust-analyzer",
|
||||||
"rev": "c02a4a31eada45e591b5edb8c1a813ea7b9d408f",
|
"rev": "905641f3520230ad6ef421bcf5da9c6b49f2479b",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -95,11 +95,11 @@
|
||||||
"systems": "systems"
|
"systems": "systems"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1710146030,
|
"lastModified": 1731533236,
|
||||||
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
|
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||||
"owner": "numtide",
|
"owner": "numtide",
|
||||||
"repo": "flake-utils",
|
"repo": "flake-utils",
|
||||||
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
|
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use std::{collections::HashMap, sync::Arc};
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use pdf::{backend::Backend, content::{Op, Point, TextDrawAdjusted}};
|
use pdf::{backend::Backend, content::{Op, Point, TextDrawAdjusted}, file::{NoCache, NoLog}};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use time::Date;
|
use time::Date;
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ pub enum Error {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DataSet {
|
impl DataSet {
|
||||||
pub fn extract<B: Backend>(doc: &pdf::file::File<B>) -> Result<Self, Error> {
|
pub fn extract<B: Backend>(doc: &pdf::file::File<B, NoCache, NoCache, NoLog>) -> Result<Self, Error> {
|
||||||
let mut doc_iter = DocumentIterator::new(doc).peekable();
|
let mut doc_iter = DocumentIterator::new(doc).peekable();
|
||||||
|
|
||||||
let mut columns: Vec<(Arc<String>, f32)> = Vec::new();
|
let mut columns: Vec<(Arc<String>, f32)> = Vec::new();
|
||||||
|
@ -113,13 +113,13 @@ struct DocumentIterator<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> DocumentIterator<'a> {
|
impl<'a> DocumentIterator<'a> {
|
||||||
fn new<B: Backend>(document: &'a pdf::file::File<B>) -> Self {
|
fn new<B: Backend>(document: &'a pdf::file::File<B, NoCache, NoCache, NoLog>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
point: Point { x: 0.0, y: 0.0 },
|
point: Point { x: 0.0, y: 0.0 },
|
||||||
operations: Box::new(
|
operations: Box::new(
|
||||||
document
|
document
|
||||||
.pages()
|
.pages()
|
||||||
.filter_map(|page| Some(page.ok()?.contents.clone()?.operations(document).ok()?.into_iter()))
|
.filter_map(|page| Some(page.ok()?.contents.clone()?.operations(&document.resolver()).ok()?.into_iter()))
|
||||||
.flatten(),
|
.flatten(),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ use std::{
|
||||||
use futures::{sink::SinkExt, TryStreamExt};
|
use futures::{sink::SinkExt, TryStreamExt};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use log::{info, warn};
|
use log::{info, warn};
|
||||||
|
use pdf::file::{NoCache, NoLog};
|
||||||
use reqwest::Url;
|
use reqwest::Url;
|
||||||
use scraper::Selector;
|
use scraper::Selector;
|
||||||
use time::PrimitiveDateTime;
|
use time::PrimitiveDateTime;
|
||||||
|
@ -13,7 +14,7 @@ use tokio_util::codec;
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref CHARTS_URL: Url = Url::parse("https://www.mwra.com/biobot/biobotdata.htm").unwrap();
|
static ref CHARTS_URL: Url = Url::parse("https://www.mwra.com/biobot/biobotdata.htm").unwrap();
|
||||||
static ref PDF_SEL: Selector = Selector::parse(r#"a[href^="/media/file/mwradata"][href$="-data"]"#).unwrap();
|
static ref PDF_SEL: Selector = Selector::parse(r#"a[href^="/media/file/mwradata"][href$="-datapdf"]"#).unwrap();
|
||||||
static ref MIN_CHECK_INTERVAL: Duration = Duration::from_secs(300);
|
static ref MIN_CHECK_INTERVAL: Duration = Duration::from_secs(300);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,7 +55,7 @@ impl PdfFetcher {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn fetch(&mut self) -> Result<pdf::file::File<Vec<u8>>, Error> {
|
pub async fn fetch(&mut self) -> Result<pdf::file::File<Vec<u8>, NoCache, NoCache, NoLog>, Error> {
|
||||||
info!("Fetching data PDF");
|
info!("Fetching data PDF");
|
||||||
|
|
||||||
let cache_modtime = match tokio::fs::File::open(&self.cached_pdf_path).await {
|
let cache_modtime = match tokio::fs::File::open(&self.cached_pdf_path).await {
|
||||||
|
@ -145,7 +146,7 @@ impl PdfFetcher {
|
||||||
self.cached_pdf()
|
self.cached_pdf()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cached_pdf(&self) -> Result<pdf::file::File<Vec<u8>>, Error> {
|
fn cached_pdf(&self) -> Result<pdf::file::File<Vec<u8>, NoCache, NoCache, NoLog>, Error> {
|
||||||
Ok(pdf::file::File::open(&self.cached_pdf_path)?)
|
Ok(pdf::file::FileOptions::uncached().open(&self.cached_pdf_path)?)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue