From aa508a43cb6e33911c1c0a3ec979b5f2b40d79f4 Mon Sep 17 00:00:00 2001 From: xenofem Date: Thu, 7 Apr 2022 01:09:45 -0400 Subject: [PATCH] refactor serialization, add json --- Cargo.lock | 7 +++ Cargo.toml | 1 + src/extract.rs | 32 +----------- src/main.rs | 57 +++++++-------------- src/serialize.rs | 127 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 155 insertions(+), 69 deletions(-) create mode 100644 src/serialize.rs diff --git a/Cargo.lock b/Cargo.lock index 61e119d..5b46e70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -995,6 +995,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078e285eafdfb6c4b434e0d31e8cfcb5115b651496faca5749b88fafd4f23bfd" + [[package]] name = "language-tags" version = "0.3.2" @@ -1381,6 +1387,7 @@ dependencies = [ "actix-web", "bytes", "futures", + "json", "lazy_static", "pdf", "regex", diff --git a/Cargo.toml b/Cargo.toml index bddf24e..069fdce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ actix-files = "0.6.0" actix-web = "4.0.1" bytes = "1.1" futures = "0.3" +json = "0.12.4" lazy_static = "1.4" pdf = "0.7.2" regex = "1.5.5" diff --git a/src/extract.rs b/src/extract.rs index 1ca18ec..7890738 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, fmt::Write, sync::Arc}; +use std::{collections::HashMap, sync::Arc}; use lazy_static::lazy_static; use pdf::{backend::Backend, content::Operation, primitive::Primitive}; @@ -15,8 +15,6 @@ lazy_static! { const DATE_PARSE_FORMAT: &[time::format_description::FormatItem] = time::macros::format_description!("[month padding:none]/[day padding:none]/[year]"); -const DATE_DISPLAY_FORMAT: &[time::format_description::FormatItem] = - time::macros::format_description!("[year]-[month]-[day]"); pub struct DataSet { pub columns: Vec>, @@ -88,34 +86,6 @@ impl DataSet { rows, }) } - - pub fn csv_header(&self) -> Result { - let mut header = String::from("Date"); - for column in self.columns.iter() { - write!(&mut header, ",{}", column)?; - } - Ok(header) - } - - pub fn csv_row(&self, datapoint: &DataPoint) -> Result { - let mut csv_row = datapoint - .date - .format(DATE_DISPLAY_FORMAT) - .expect("Failed to format date!"); - for column in self.columns.iter() { - if let Some(val) = datapoint.values.get(column) { - write!(&mut csv_row, ",{}", val)?; - } else { - write!(&mut csv_row, ",")?; - } - } - Ok(csv_row) - } - - pub fn csv_rows(&self) -> impl Iterator> + '_ { - std::iter::once_with(|| self.csv_header()) - .chain(self.rows.iter().map(|datapoint| self.csv_row(datapoint))) - } } pub struct DataPoint { diff --git a/src/main.rs b/src/main.rs index cb48873..b84da58 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,16 @@ use std::{sync::Arc, time::Duration}; -use actix_web::{get, web, App, HttpResponse, HttpServer, Responder}; +use actix_web::{get, http::header::ContentType, web, App, HttpResponse, HttpServer, Responder}; use lazy_static::lazy_static; use tokio::sync::RwLock; mod extract; mod fetch; +mod serialize; use extract::DataSet; use fetch::PdfFetcher; +use serialize::{Csv, DataSerializer, Json}; lazy_static! { static ref UPDATE_INTERVAL: Duration = Duration::from_secs(3600); @@ -64,6 +66,7 @@ async fn main() -> std::io::Result<()> { App::new() .app_data(state.clone()) .service(csv) + .service(json) .service(actix_files::Files::new("/", "./static/").index_file("index.html")) }) .bind("127.0.0.1:8080")? @@ -71,48 +74,26 @@ async fn main() -> std::io::Result<()> { .await } -struct DataIterator { - dataset: Arc, - index: Option, -} - -impl DataIterator { - fn new(dataset: Arc) -> Self { - Self { - dataset, - index: None, - } - } -} - -impl Iterator for DataIterator { - type Item = Result; - - fn next(&mut self) -> Option { - match self.index { - None => { - self.index = Some(0); - Some(self.dataset.csv_header().map(|s| s + "\n")) - } - Some(i) => { - if let Some(row) = self.dataset.rows.get(i) { - self.index = Some(i + 1); - Some(self.dataset.csv_row(row).map(|s| s + "\n")) - } else { - None - } - } - } - } -} - #[get("/data.csv")] async fn csv(data: web::Data) -> impl Responder { let dataset = { data.dataset.read().await.clone() }; - let rows = - tokio_stream::iter(DataIterator::new(dataset).map(|item| item.map(bytes::Bytes::from))); + let rows = tokio_stream::iter( + DataSerializer::new(dataset, Csv).map(|item| item.map(bytes::Bytes::from)), + ); HttpResponse::Ok() .content_type("text/csv; charset=utf-8") .body(actix_web::body::BodyStream::new(rows)) } + +#[get("/data.json")] +async fn json(data: web::Data) -> impl Responder { + let dataset = { data.dataset.read().await.clone() }; + + let rows = tokio_stream::iter( + DataSerializer::new(dataset, Json).map(|item| item.map(bytes::Bytes::from)), + ); + HttpResponse::Ok() + .insert_header(ContentType::json()) + .body(actix_web::body::BodyStream::new(rows)) +} diff --git a/src/serialize.rs b/src/serialize.rs new file mode 100644 index 0000000..5de17be --- /dev/null +++ b/src/serialize.rs @@ -0,0 +1,127 @@ +use std::{fmt::Write, marker::PhantomData, sync::Arc}; + +use crate::extract::{DataPoint, DataSet}; + +const DATE_DISPLAY_FORMAT: &[time::format_description::FormatItem] = + time::macros::format_description!("[year]-[month]-[day]"); + +type SerializationChunk = Result; + +pub trait DataFormat { + fn header(dataset: &DataSet) -> SerializationChunk; + fn row(dataset: &DataSet, row: &DataPoint) -> SerializationChunk; + const ROW_SEPARATOR: &'static str; + const END: &'static str; +} + +pub struct DataSerializer { + dataset: Arc, + index: Option, + serializer: PhantomData, +} + +impl DataSerializer { + pub fn new(dataset: Arc, _: F) -> Self { + Self { + dataset, + index: None, + serializer: PhantomData, + } + } +} + +impl Iterator for DataSerializer { + type Item = SerializationChunk; + fn next(&mut self) -> Option { + match self.index { + None => { + self.index = Some(0); + let header = F::header(&self.dataset); + if self.dataset.rows.is_empty() { + Some(header.map(|s| s + F::END)) + } else { + Some(header) + } + } + Some(i) => { + if let Some(row) = self.dataset.rows.get(i) { + self.index = Some(i + 1); + let serialized_row = F::row(&self.dataset, row); + let suffix = if i == self.dataset.rows.len() - 1 { + F::END + } else { + F::ROW_SEPARATOR + }; + Some(serialized_row.map(|s| s + suffix)) + } else { + None + } + } + } + } +} + +pub struct Csv; +impl DataFormat for Csv { + fn header(dataset: &DataSet) -> SerializationChunk { + let mut header = String::from("Date"); + for column in dataset.columns.iter() { + write!(&mut header, ",{}", column)?; + } + writeln!(&mut header)?; + Ok(header) + } + + fn row(dataset: &DataSet, datapoint: &DataPoint) -> SerializationChunk { + let mut csv_row = datapoint + .date + .format(DATE_DISPLAY_FORMAT) + .expect("Failed to format date!"); + for column in dataset.columns.iter() { + if let Some(val) = datapoint.values.get(column) { + write!(&mut csv_row, ",{}", val)?; + } else { + write!(&mut csv_row, ",")?; + } + } + writeln!(&mut csv_row)?; + Ok(csv_row) + } + + const ROW_SEPARATOR: &'static str = ""; + const END: &'static str = ""; +} + +pub struct Json; +impl DataFormat for Json { + fn header(dataset: &DataSet) -> SerializationChunk { + let mut header = String::from(r#"{"columns":["Date""#); + for column in dataset.columns.iter() { + write!(&mut header, ",{}", json::stringify(column.as_str()))?; + } + write!(&mut header, r#"],"rows":["#)?; + Ok(header) + } + + fn row(dataset: &DataSet, datapoint: &DataPoint) -> SerializationChunk { + let mut row = String::from(r#"{"Date":"#); + write!( + &mut row, + r#""{}""#, + datapoint + .date + .format(DATE_DISPLAY_FORMAT) + .expect("Failed to format date!") + )?; + for column in dataset.columns.iter() { + if let Some(val) = datapoint.values.get(column) { + write!(&mut row, ",{}:{}", json::stringify(column.as_str()), val)?; + } + } + row += "}"; + Ok(row) + } + + const ROW_SEPARATOR: &'static str = ","; + const END: &'static str = "]}"; +}