Compare commits

..

3 commits

Author SHA1 Message Date
xenofem 774f3195de don't immediately run fetcher a second time on startup 2022-04-16 00:04:41 -04:00
xenofem 1f7e5ee5ca add result/ to .gitignore 2022-04-16 00:03:50 -04:00
xenofem 7a3fd3f2ca add basic logging 2022-04-16 00:03:09 -04:00
5 changed files with 57 additions and 3 deletions

3
.gitignore vendored
View file

@ -1,2 +1,3 @@
/target /target
data.pdf /data.pdf
/result

37
Cargo.lock generated
View file

@ -288,6 +288,17 @@ version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341" checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341"
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.1.0" version = "1.1.0"
@ -425,6 +436,17 @@ dependencies = [
"generic-array", "generic-array",
] ]
[[package]]
name = "colored"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd"
dependencies = [
"atty",
"lazy_static",
"winapi",
]
[[package]] [[package]]
name = "convert_case" name = "convert_case"
version = "0.4.0" version = "0.4.0"
@ -1389,10 +1411,12 @@ dependencies = [
"futures", "futures",
"json", "json",
"lazy_static", "lazy_static",
"log",
"pdf", "pdf",
"regex", "regex",
"reqwest", "reqwest",
"scraper", "scraper",
"simple_logger",
"thiserror", "thiserror",
"time 0.3.9", "time 0.3.9",
"tokio", "tokio",
@ -1764,6 +1788,19 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "simple_logger"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c75a9723083573ace81ad0cdfc50b858aa3c366c48636edb4109d73122a0c0ea"
dependencies = [
"atty",
"colored",
"log",
"time 0.3.9",
"winapi",
]
[[package]] [[package]]
name = "siphasher" name = "siphasher"
version = "0.3.10" version = "0.3.10"

View file

@ -14,10 +14,12 @@ bytes = "1.1"
futures = "0.3" futures = "0.3"
json = "0.12.4" json = "0.12.4"
lazy_static = "1.4" lazy_static = "1.4"
log = "0.4.16"
pdf = "0.7.2" pdf = "0.7.2"
regex = "1.5.5" regex = "1.5.5"
reqwest = { version = "0.11", features = ["rustls-tls", "stream"], default-features = false } reqwest = { version = "0.11", features = ["rustls-tls", "stream"], default-features = false }
scraper = "0.12" scraper = "0.12"
simple_logger = { version = "2.1.0", features = ["stderr"] }
thiserror = "1" thiserror = "1"
time = { version = "0.3.9", features = ["formatting", "macros", "parsing"] } time = { version = "0.3.9", features = ["formatting", "macros", "parsing"] }
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }

View file

@ -2,6 +2,7 @@ use std::time::{Duration, Instant, SystemTime};
use futures::{sink::SinkExt, TryStreamExt}; use futures::{sink::SinkExt, TryStreamExt};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use log::info;
use reqwest::Url; use reqwest::Url;
use scraper::Selector; use scraper::Selector;
use time::PrimitiveDateTime; use time::PrimitiveDateTime;
@ -50,6 +51,8 @@ impl PdfFetcher {
} }
pub async fn fetch(&mut self) -> Result<pdf::file::File<Vec<u8>>, Error> { pub async fn fetch(&mut self) -> Result<pdf::file::File<Vec<u8>>, Error> {
info!("Fetching data PDF");
let cache_modtime = match tokio::fs::File::open(CACHED_PDF_PATH).await { let cache_modtime = match tokio::fs::File::open(CACHED_PDF_PATH).await {
Ok(file) => Some( Ok(file) => Some(
file.metadata() file.metadata()
@ -65,6 +68,7 @@ impl PdfFetcher {
if let Some(instant) = self.last_checked { if let Some(instant) = self.last_checked {
if now - instant < *MIN_CHECK_INTERVAL { if now - instant < *MIN_CHECK_INTERVAL {
return if cache_modtime.is_some() { return if cache_modtime.is_some() {
info!("Already checked origin recently, not rechecking");
self.cached_pdf() self.cached_pdf()
} else { } else {
Err(Error::TooSoon) Err(Error::TooSoon)
@ -111,6 +115,8 @@ impl PdfFetcher {
.map_or(true, |(cache, origin)| origin > cache); .map_or(true, |(cache, origin)| origin > cache);
if outdated { if outdated {
info!("Cached PDF is outdated, downloading latest version");
let mut pdf_stream = self let mut pdf_stream = self
.client .client
.get(pdf_url) .get(pdf_url)
@ -124,6 +130,8 @@ impl PdfFetcher {
sink.send_all(&mut pdf_stream).await?; sink.send_all(&mut pdf_stream).await?;
<dyn futures::Sink<bytes::Bytes, Error = std::io::Error> + Unpin>::close(&mut sink) <dyn futures::Sink<bytes::Bytes, Error = std::io::Error> + Unpin>::close(&mut sink)
.await?; .await?;
} else {
info!("Cached PDF is already up to date");
} }
self.last_checked = Some(now); self.last_checked = Some(now);
self.cached_pdf() self.cached_pdf()

View file

@ -1,6 +1,9 @@
use std::{sync::Arc, time::Duration}; use std::{sync::Arc, time::Duration};
use actix_web::{get, http::header::ContentType, web, App, HttpResponse, HttpServer, Responder}; use actix_web::{
get, http::header::ContentType, middleware::Logger, web, App, HttpResponse, HttpServer,
Responder,
};
use lazy_static::lazy_static; use lazy_static::lazy_static;
use tokio::sync::RwLock; use tokio::sync::RwLock;
@ -47,10 +50,10 @@ async fn start_updater() -> Result<web::Data<AppState>, Error> {
std::thread::spawn(move || { std::thread::spawn(move || {
actix_web::rt::System::new().block_on(async { actix_web::rt::System::new().block_on(async {
loop { loop {
actix_web::rt::time::sleep(*UPDATE_INTERVAL).await;
if let Err(e) = try_update(&state_copy, &mut fetcher).await { if let Err(e) = try_update(&state_copy, &mut fetcher).await {
eprintln!("Error updating data: {:#?}", e); eprintln!("Error updating data: {:#?}", e);
} }
actix_web::rt::time::sleep(*UPDATE_INTERVAL).await;
} }
}); });
}); });
@ -60,11 +63,14 @@ async fn start_updater() -> Result<web::Data<AppState>, Error> {
#[actix_web::main] #[actix_web::main]
async fn main() -> std::io::Result<()> { async fn main() -> std::io::Result<()> {
simple_logger::init_with_level(log::Level::Info).unwrap();
let state = start_updater().await.expect("Failed to initialize state"); let state = start_updater().await.expect("Failed to initialize state");
HttpServer::new(move || { HttpServer::new(move || {
App::new() App::new()
.app_data(state.clone()) .app_data(state.clone())
.wrap(Logger::default())
.service(csv) .service(csv)
.service(json) .service(json)
.service(actix_files::Files::new("/", "./static/").index_file("index.html")) .service(actix_files::Files::new("/", "./static/").index_file("index.html"))