simple web server

This commit is contained in:
xenofem 2022-04-06 18:46:17 -04:00
parent 50a25c494a
commit 921b62ed97
5 changed files with 624 additions and 40 deletions

View file

@ -1,6 +1,4 @@
use std::collections::HashMap;
use std::fmt::Write;
use std::rc::Rc;
use std::{collections::HashMap, fmt::Write, sync::Arc};
use lazy_static::lazy_static;
use pdf::{backend::Backend, content::Operation, primitive::Primitive};
@ -21,15 +19,21 @@ const DATE_DISPLAY_FORMAT: &[time::format_description::FormatItem] =
time::macros::format_description!("[year]-[month]-[day]");
pub struct DataSet {
pub columns: Vec<Rc<String>>,
pub columns: Vec<Arc<String>>,
pub rows: Vec<DataPoint>,
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("PDF contained no data rows")]
NoData,
}
impl DataSet {
pub fn extract<B: Backend>(doc: &pdf::file::File<B>) -> Option<Self> {
pub fn extract<B: Backend>(doc: &pdf::file::File<B>) -> Result<Self, Error> {
let mut doc_iter = DocumentIterator::new(doc).peekable();
let mut columns: Vec<(Rc<String>, f32)> = Vec::new();
let mut columns: Vec<(Arc<String>, f32)> = Vec::new();
let mut rows: Vec<DataPoint> = Vec::new();
let (mut current_datapoint, mut current_y) = loop {
@ -39,7 +43,7 @@ impl DataSet {
let column_x = text.x;
while let Some(more) = doc_iter.peek() {
if is_new_column_header(&more.text) || DATE_REGEX.is_match(&more.text) {
columns.push((Rc::new(column_name), column_x));
columns.push((Arc::new(column_name), column_x));
break;
}
column_name += " ";
@ -53,7 +57,7 @@ impl DataSet {
);
}
} else {
return None;
return Err(Error::NoData);
}
};
@ -79,40 +83,44 @@ impl DataSet {
}
}
Some(Self {
Ok(Self {
columns: columns.into_iter().map(|(column, _)| column).collect(),
rows,
})
}
pub fn csv_header(&self) -> Result<String, std::fmt::Error> {
let mut header = String::from("Date");
for column in self.columns.iter() {
write!(&mut header, ",{}", column)?;
}
Ok(header)
}
pub fn csv_row(&self, datapoint: &DataPoint) -> Result<String, std::fmt::Error> {
let mut csv_row = datapoint
.date
.format(DATE_DISPLAY_FORMAT)
.expect("Failed to format date!");
for column in self.columns.iter() {
if let Some(val) = datapoint.values.get(column) {
write!(&mut csv_row, ",{}", val)?;
} else {
write!(&mut csv_row, ",")?;
}
}
Ok(csv_row)
}
pub fn csv_rows(&self) -> impl Iterator<Item = Result<String, std::fmt::Error>> + '_ {
std::iter::once_with(|| {
let mut header = String::from("Date");
for column in self.columns.iter() {
write!(&mut header, ",{}", column)?;
}
Ok(header)
})
.chain(self.rows.iter().map(|datapoint| {
let mut csv_row = datapoint
.date
.format(DATE_DISPLAY_FORMAT)
.expect("Failed to format date!");
for column in self.columns.iter() {
if let Some(val) = datapoint.values.get(column) {
write!(&mut csv_row, ",{}", val)?;
} else {
write!(&mut csv_row, ",")?;
}
}
Ok(csv_row)
}))
std::iter::once_with(|| self.csv_header())
.chain(self.rows.iter().map(|datapoint| self.csv_row(datapoint)))
}
}
pub struct DataPoint {
pub date: Date,
pub values: HashMap<Rc<String>, u32>,
pub values: HashMap<Arc<String>, u32>,
}
impl DataPoint {

View file

@ -1,15 +1,118 @@
use std::{sync::Arc, time::Duration};
use actix_web::{get, web, App, HttpResponse, HttpServer, Responder};
use lazy_static::lazy_static;
use tokio::sync::RwLock;
mod extract;
mod fetch;
use extract::DataSet;
use fetch::PdfFetcher;
#[tokio::main]
async fn main() {
let mut fetcher = PdfFetcher::new().expect("Failed to initialize PDF fetcher");
let doc = fetcher.fetch().await.expect("Failed to fetch PDF");
let dataset = DataSet::extract(&doc).expect("Failed to extract dataset");
for row in dataset.csv_rows() {
println!("{}", row.unwrap());
lazy_static! {
static ref UPDATE_INTERVAL: Duration = Duration::from_secs(3600);
}
struct AppState {
dataset: RwLock<Arc<DataSet>>,
}
#[derive(thiserror::Error, Debug)]
enum Error {
#[error("Failed to fetch PDF")]
Fetch(#[from] fetch::Error),
#[error("Failed to extract data from PDF")]
Extract(#[from] extract::Error),
}
async fn load_data(fetcher: &mut PdfFetcher) -> Result<DataSet, Error> {
Ok(DataSet::extract(&fetcher.fetch().await?)?)
}
async fn try_update(state: &AppState, fetcher: &mut PdfFetcher) -> Result<(), Error> {
*state.dataset.write().await = Arc::new(load_data(fetcher).await?);
Ok(())
}
async fn start_updater() -> Result<web::Data<AppState>, Error> {
let mut fetcher = PdfFetcher::new()?;
let state = web::Data::new(AppState {
dataset: RwLock::new(Arc::new(load_data(&mut fetcher).await?)),
});
let state_copy = state.clone();
std::thread::spawn(move || {
actix_web::rt::System::new().block_on(async {
loop {
if let Err(e) = try_update(&state_copy, &mut fetcher).await {
eprintln!("Error updating data: {:#?}", e);
}
actix_web::rt::time::sleep(*UPDATE_INTERVAL).await;
}
});
});
Ok(state)
}
#[actix_web::main]
async fn main() -> std::io::Result<()> {
let state = start_updater().await.expect("Failed to initialize state");
HttpServer::new(move || {
App::new()
.app_data(state.clone())
.service(csv)
.service(actix_files::Files::new("/", "./static/").index_file("index.html"))
})
.bind("127.0.0.1:8080")?
.run()
.await
}
struct DataIterator {
dataset: Arc<DataSet>,
index: Option<usize>,
}
impl DataIterator {
fn new(dataset: Arc<DataSet>) -> Self {
Self {
dataset,
index: None,
}
}
}
impl Iterator for DataIterator {
type Item = Result<String, std::fmt::Error>;
fn next(&mut self) -> Option<Self::Item> {
match self.index {
None => {
self.index = Some(0);
Some(self.dataset.csv_header().map(|s| s + "\n"))
}
Some(i) => {
if let Some(row) = self.dataset.rows.get(i) {
self.index = Some(i + 1);
Some(self.dataset.csv_row(row).map(|s| s + "\n"))
} else {
None
}
}
}
}
}
#[get("/data.csv")]
async fn csv(data: web::Data<AppState>) -> impl Responder {
let dataset = { data.dataset.read().await.clone() };
let rows =
tokio_stream::iter(DataIterator::new(dataset).map(|item| item.map(bytes::Bytes::from)));
HttpResponse::Ok()
.content_type("text/csv; charset=utf-8")
.body(actix_web::body::BodyStream::new(rows))
}