refactor serialization, add json

This commit is contained in:
xenofem 2022-04-07 01:09:45 -04:00
parent 921b62ed97
commit aa508a43cb
5 changed files with 155 additions and 69 deletions

7
Cargo.lock generated
View file

@ -995,6 +995,12 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "json"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "078e285eafdfb6c4b434e0d31e8cfcb5115b651496faca5749b88fafd4f23bfd"
[[package]]
name = "language-tags"
version = "0.3.2"
@ -1381,6 +1387,7 @@ dependencies = [
"actix-web",
"bytes",
"futures",
"json",
"lazy_static",
"pdf",
"regex",

View file

@ -11,6 +11,7 @@ actix-files = "0.6.0"
actix-web = "4.0.1"
bytes = "1.1"
futures = "0.3"
json = "0.12.4"
lazy_static = "1.4"
pdf = "0.7.2"
regex = "1.5.5"

View file

@ -1,4 +1,4 @@
use std::{collections::HashMap, fmt::Write, sync::Arc};
use std::{collections::HashMap, sync::Arc};
use lazy_static::lazy_static;
use pdf::{backend::Backend, content::Operation, primitive::Primitive};
@ -15,8 +15,6 @@ lazy_static! {
const DATE_PARSE_FORMAT: &[time::format_description::FormatItem] =
time::macros::format_description!("[month padding:none]/[day padding:none]/[year]");
const DATE_DISPLAY_FORMAT: &[time::format_description::FormatItem] =
time::macros::format_description!("[year]-[month]-[day]");
pub struct DataSet {
pub columns: Vec<Arc<String>>,
@ -88,34 +86,6 @@ impl DataSet {
rows,
})
}
pub fn csv_header(&self) -> Result<String, std::fmt::Error> {
let mut header = String::from("Date");
for column in self.columns.iter() {
write!(&mut header, ",{}", column)?;
}
Ok(header)
}
pub fn csv_row(&self, datapoint: &DataPoint) -> Result<String, std::fmt::Error> {
let mut csv_row = datapoint
.date
.format(DATE_DISPLAY_FORMAT)
.expect("Failed to format date!");
for column in self.columns.iter() {
if let Some(val) = datapoint.values.get(column) {
write!(&mut csv_row, ",{}", val)?;
} else {
write!(&mut csv_row, ",")?;
}
}
Ok(csv_row)
}
pub fn csv_rows(&self) -> impl Iterator<Item = Result<String, std::fmt::Error>> + '_ {
std::iter::once_with(|| self.csv_header())
.chain(self.rows.iter().map(|datapoint| self.csv_row(datapoint)))
}
}
pub struct DataPoint {

View file

@ -1,14 +1,16 @@
use std::{sync::Arc, time::Duration};
use actix_web::{get, web, App, HttpResponse, HttpServer, Responder};
use actix_web::{get, http::header::ContentType, web, App, HttpResponse, HttpServer, Responder};
use lazy_static::lazy_static;
use tokio::sync::RwLock;
mod extract;
mod fetch;
mod serialize;
use extract::DataSet;
use fetch::PdfFetcher;
use serialize::{Csv, DataSerializer, Json};
lazy_static! {
static ref UPDATE_INTERVAL: Duration = Duration::from_secs(3600);
@ -64,6 +66,7 @@ async fn main() -> std::io::Result<()> {
App::new()
.app_data(state.clone())
.service(csv)
.service(json)
.service(actix_files::Files::new("/", "./static/").index_file("index.html"))
})
.bind("127.0.0.1:8080")?
@ -71,48 +74,26 @@ async fn main() -> std::io::Result<()> {
.await
}
struct DataIterator {
dataset: Arc<DataSet>,
index: Option<usize>,
}
impl DataIterator {
fn new(dataset: Arc<DataSet>) -> Self {
Self {
dataset,
index: None,
}
}
}
impl Iterator for DataIterator {
type Item = Result<String, std::fmt::Error>;
fn next(&mut self) -> Option<Self::Item> {
match self.index {
None => {
self.index = Some(0);
Some(self.dataset.csv_header().map(|s| s + "\n"))
}
Some(i) => {
if let Some(row) = self.dataset.rows.get(i) {
self.index = Some(i + 1);
Some(self.dataset.csv_row(row).map(|s| s + "\n"))
} else {
None
}
}
}
}
}
#[get("/data.csv")]
async fn csv(data: web::Data<AppState>) -> impl Responder {
let dataset = { data.dataset.read().await.clone() };
let rows =
tokio_stream::iter(DataIterator::new(dataset).map(|item| item.map(bytes::Bytes::from)));
let rows = tokio_stream::iter(
DataSerializer::new(dataset, Csv).map(|item| item.map(bytes::Bytes::from)),
);
HttpResponse::Ok()
.content_type("text/csv; charset=utf-8")
.body(actix_web::body::BodyStream::new(rows))
}
#[get("/data.json")]
async fn json(data: web::Data<AppState>) -> impl Responder {
let dataset = { data.dataset.read().await.clone() };
let rows = tokio_stream::iter(
DataSerializer::new(dataset, Json).map(|item| item.map(bytes::Bytes::from)),
);
HttpResponse::Ok()
.insert_header(ContentType::json())
.body(actix_web::body::BodyStream::new(rows))
}

127
src/serialize.rs Normal file
View file

@ -0,0 +1,127 @@
use std::{fmt::Write, marker::PhantomData, sync::Arc};
use crate::extract::{DataPoint, DataSet};
const DATE_DISPLAY_FORMAT: &[time::format_description::FormatItem] =
time::macros::format_description!("[year]-[month]-[day]");
type SerializationChunk = Result<String, std::fmt::Error>;
pub trait DataFormat {
fn header(dataset: &DataSet) -> SerializationChunk;
fn row(dataset: &DataSet, row: &DataPoint) -> SerializationChunk;
const ROW_SEPARATOR: &'static str;
const END: &'static str;
}
pub struct DataSerializer<F: DataFormat> {
dataset: Arc<DataSet>,
index: Option<usize>,
serializer: PhantomData<F>,
}
impl<F: DataFormat> DataSerializer<F> {
pub fn new(dataset: Arc<DataSet>, _: F) -> Self {
Self {
dataset,
index: None,
serializer: PhantomData,
}
}
}
impl<F: DataFormat> Iterator for DataSerializer<F> {
type Item = SerializationChunk;
fn next(&mut self) -> Option<Self::Item> {
match self.index {
None => {
self.index = Some(0);
let header = F::header(&self.dataset);
if self.dataset.rows.is_empty() {
Some(header.map(|s| s + F::END))
} else {
Some(header)
}
}
Some(i) => {
if let Some(row) = self.dataset.rows.get(i) {
self.index = Some(i + 1);
let serialized_row = F::row(&self.dataset, row);
let suffix = if i == self.dataset.rows.len() - 1 {
F::END
} else {
F::ROW_SEPARATOR
};
Some(serialized_row.map(|s| s + suffix))
} else {
None
}
}
}
}
}
pub struct Csv;
impl DataFormat for Csv {
fn header(dataset: &DataSet) -> SerializationChunk {
let mut header = String::from("Date");
for column in dataset.columns.iter() {
write!(&mut header, ",{}", column)?;
}
writeln!(&mut header)?;
Ok(header)
}
fn row(dataset: &DataSet, datapoint: &DataPoint) -> SerializationChunk {
let mut csv_row = datapoint
.date
.format(DATE_DISPLAY_FORMAT)
.expect("Failed to format date!");
for column in dataset.columns.iter() {
if let Some(val) = datapoint.values.get(column) {
write!(&mut csv_row, ",{}", val)?;
} else {
write!(&mut csv_row, ",")?;
}
}
writeln!(&mut csv_row)?;
Ok(csv_row)
}
const ROW_SEPARATOR: &'static str = "";
const END: &'static str = "";
}
pub struct Json;
impl DataFormat for Json {
fn header(dataset: &DataSet) -> SerializationChunk {
let mut header = String::from(r#"{"columns":["Date""#);
for column in dataset.columns.iter() {
write!(&mut header, ",{}", json::stringify(column.as_str()))?;
}
write!(&mut header, r#"],"rows":["#)?;
Ok(header)
}
fn row(dataset: &DataSet, datapoint: &DataPoint) -> SerializationChunk {
let mut row = String::from(r#"{"Date":"#);
write!(
&mut row,
r#""{}""#,
datapoint
.date
.format(DATE_DISPLAY_FORMAT)
.expect("Failed to format date!")
)?;
for column in dataset.columns.iter() {
if let Some(val) = datapoint.values.get(column) {
write!(&mut row, ",{}:{}", json::stringify(column.as_str()), val)?;
}
}
row += "}";
Ok(row)
}
const ROW_SEPARATOR: &'static str = ",";
const END: &'static str = "]}";
}