simple web server
This commit is contained in:
parent
50a25c494a
commit
921b62ed97
5 changed files with 624 additions and 40 deletions
|
@ -1,6 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use std::fmt::Write;
|
||||
use std::rc::Rc;
|
||||
use std::{collections::HashMap, fmt::Write, sync::Arc};
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use pdf::{backend::Backend, content::Operation, primitive::Primitive};
|
||||
|
@ -21,15 +19,21 @@ const DATE_DISPLAY_FORMAT: &[time::format_description::FormatItem] =
|
|||
time::macros::format_description!("[year]-[month]-[day]");
|
||||
|
||||
pub struct DataSet {
|
||||
pub columns: Vec<Rc<String>>,
|
||||
pub columns: Vec<Arc<String>>,
|
||||
pub rows: Vec<DataPoint>,
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("PDF contained no data rows")]
|
||||
NoData,
|
||||
}
|
||||
|
||||
impl DataSet {
|
||||
pub fn extract<B: Backend>(doc: &pdf::file::File<B>) -> Option<Self> {
|
||||
pub fn extract<B: Backend>(doc: &pdf::file::File<B>) -> Result<Self, Error> {
|
||||
let mut doc_iter = DocumentIterator::new(doc).peekable();
|
||||
|
||||
let mut columns: Vec<(Rc<String>, f32)> = Vec::new();
|
||||
let mut columns: Vec<(Arc<String>, f32)> = Vec::new();
|
||||
let mut rows: Vec<DataPoint> = Vec::new();
|
||||
|
||||
let (mut current_datapoint, mut current_y) = loop {
|
||||
|
@ -39,7 +43,7 @@ impl DataSet {
|
|||
let column_x = text.x;
|
||||
while let Some(more) = doc_iter.peek() {
|
||||
if is_new_column_header(&more.text) || DATE_REGEX.is_match(&more.text) {
|
||||
columns.push((Rc::new(column_name), column_x));
|
||||
columns.push((Arc::new(column_name), column_x));
|
||||
break;
|
||||
}
|
||||
column_name += " ";
|
||||
|
@ -53,7 +57,7 @@ impl DataSet {
|
|||
);
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
return Err(Error::NoData);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -79,40 +83,44 @@ impl DataSet {
|
|||
}
|
||||
}
|
||||
|
||||
Some(Self {
|
||||
Ok(Self {
|
||||
columns: columns.into_iter().map(|(column, _)| column).collect(),
|
||||
rows,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn csv_header(&self) -> Result<String, std::fmt::Error> {
|
||||
let mut header = String::from("Date");
|
||||
for column in self.columns.iter() {
|
||||
write!(&mut header, ",{}", column)?;
|
||||
}
|
||||
Ok(header)
|
||||
}
|
||||
|
||||
pub fn csv_row(&self, datapoint: &DataPoint) -> Result<String, std::fmt::Error> {
|
||||
let mut csv_row = datapoint
|
||||
.date
|
||||
.format(DATE_DISPLAY_FORMAT)
|
||||
.expect("Failed to format date!");
|
||||
for column in self.columns.iter() {
|
||||
if let Some(val) = datapoint.values.get(column) {
|
||||
write!(&mut csv_row, ",{}", val)?;
|
||||
} else {
|
||||
write!(&mut csv_row, ",")?;
|
||||
}
|
||||
}
|
||||
Ok(csv_row)
|
||||
}
|
||||
|
||||
pub fn csv_rows(&self) -> impl Iterator<Item = Result<String, std::fmt::Error>> + '_ {
|
||||
std::iter::once_with(|| {
|
||||
let mut header = String::from("Date");
|
||||
for column in self.columns.iter() {
|
||||
write!(&mut header, ",{}", column)?;
|
||||
}
|
||||
Ok(header)
|
||||
})
|
||||
.chain(self.rows.iter().map(|datapoint| {
|
||||
let mut csv_row = datapoint
|
||||
.date
|
||||
.format(DATE_DISPLAY_FORMAT)
|
||||
.expect("Failed to format date!");
|
||||
for column in self.columns.iter() {
|
||||
if let Some(val) = datapoint.values.get(column) {
|
||||
write!(&mut csv_row, ",{}", val)?;
|
||||
} else {
|
||||
write!(&mut csv_row, ",")?;
|
||||
}
|
||||
}
|
||||
Ok(csv_row)
|
||||
}))
|
||||
std::iter::once_with(|| self.csv_header())
|
||||
.chain(self.rows.iter().map(|datapoint| self.csv_row(datapoint)))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DataPoint {
|
||||
pub date: Date,
|
||||
pub values: HashMap<Rc<String>, u32>,
|
||||
pub values: HashMap<Arc<String>, u32>,
|
||||
}
|
||||
|
||||
impl DataPoint {
|
||||
|
|
117
src/main.rs
117
src/main.rs
|
@ -1,15 +1,118 @@
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use actix_web::{get, web, App, HttpResponse, HttpServer, Responder};
|
||||
use lazy_static::lazy_static;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
mod extract;
|
||||
mod fetch;
|
||||
|
||||
use extract::DataSet;
|
||||
use fetch::PdfFetcher;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let mut fetcher = PdfFetcher::new().expect("Failed to initialize PDF fetcher");
|
||||
let doc = fetcher.fetch().await.expect("Failed to fetch PDF");
|
||||
let dataset = DataSet::extract(&doc).expect("Failed to extract dataset");
|
||||
for row in dataset.csv_rows() {
|
||||
println!("{}", row.unwrap());
|
||||
lazy_static! {
|
||||
static ref UPDATE_INTERVAL: Duration = Duration::from_secs(3600);
|
||||
}
|
||||
|
||||
struct AppState {
|
||||
dataset: RwLock<Arc<DataSet>>,
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
enum Error {
|
||||
#[error("Failed to fetch PDF")]
|
||||
Fetch(#[from] fetch::Error),
|
||||
#[error("Failed to extract data from PDF")]
|
||||
Extract(#[from] extract::Error),
|
||||
}
|
||||
|
||||
async fn load_data(fetcher: &mut PdfFetcher) -> Result<DataSet, Error> {
|
||||
Ok(DataSet::extract(&fetcher.fetch().await?)?)
|
||||
}
|
||||
|
||||
async fn try_update(state: &AppState, fetcher: &mut PdfFetcher) -> Result<(), Error> {
|
||||
*state.dataset.write().await = Arc::new(load_data(fetcher).await?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn start_updater() -> Result<web::Data<AppState>, Error> {
|
||||
let mut fetcher = PdfFetcher::new()?;
|
||||
let state = web::Data::new(AppState {
|
||||
dataset: RwLock::new(Arc::new(load_data(&mut fetcher).await?)),
|
||||
});
|
||||
|
||||
let state_copy = state.clone();
|
||||
std::thread::spawn(move || {
|
||||
actix_web::rt::System::new().block_on(async {
|
||||
loop {
|
||||
if let Err(e) = try_update(&state_copy, &mut fetcher).await {
|
||||
eprintln!("Error updating data: {:#?}", e);
|
||||
}
|
||||
actix_web::rt::time::sleep(*UPDATE_INTERVAL).await;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
Ok(state)
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> std::io::Result<()> {
|
||||
let state = start_updater().await.expect("Failed to initialize state");
|
||||
|
||||
HttpServer::new(move || {
|
||||
App::new()
|
||||
.app_data(state.clone())
|
||||
.service(csv)
|
||||
.service(actix_files::Files::new("/", "./static/").index_file("index.html"))
|
||||
})
|
||||
.bind("127.0.0.1:8080")?
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
struct DataIterator {
|
||||
dataset: Arc<DataSet>,
|
||||
index: Option<usize>,
|
||||
}
|
||||
|
||||
impl DataIterator {
|
||||
fn new(dataset: Arc<DataSet>) -> Self {
|
||||
Self {
|
||||
dataset,
|
||||
index: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for DataIterator {
|
||||
type Item = Result<String, std::fmt::Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.index {
|
||||
None => {
|
||||
self.index = Some(0);
|
||||
Some(self.dataset.csv_header().map(|s| s + "\n"))
|
||||
}
|
||||
Some(i) => {
|
||||
if let Some(row) = self.dataset.rows.get(i) {
|
||||
self.index = Some(i + 1);
|
||||
Some(self.dataset.csv_row(row).map(|s| s + "\n"))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[get("/data.csv")]
|
||||
async fn csv(data: web::Data<AppState>) -> impl Responder {
|
||||
let dataset = { data.dataset.read().await.clone() };
|
||||
|
||||
let rows =
|
||||
tokio_stream::iter(DataIterator::new(dataset).map(|item| item.map(bytes::Bytes::from)));
|
||||
HttpResponse::Ok()
|
||||
.content_type("text/csv; charset=utf-8")
|
||||
.body(actix_web::body::BodyStream::new(rows))
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue