"""embedded database""" library

This commit is contained in:
xenofem 2022-08-15 15:42:47 -04:00
commit efd3124b29
3 changed files with 391 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/target
/Cargo.lock

17
Cargo.toml Normal file
View file

@ -0,0 +1,17 @@
[package]
name = "jsondb"
version = "0.1.0"
edition = "2021"
authors = ["xenofem <xenofem@xeno.science>"]
[dependencies]
async-trait = "0.1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
thiserror = "1"
tokio = { version = "1", features = ["full"] }
[dev-dependencies]
tempfile = "3.3"
time = { version = "0.3", features = ["macros"] }
serde_with = { version = "2", features = ["time_0_3"] }

372
src/lib.rs Normal file
View file

@ -0,0 +1,372 @@
//! A quick-and-dirty &ldquo;&ldquo;&ldquo;embedded database&rdquo;&rdquo;&rdquo; library.
//!
//! This is just a library for storing data in JSON files, but with
//! a few added conveniences:
//!
//! * The saved data includes a schema version number, and will be
//! automatically migrated to newer schema versions.
//! * Data is saved to the backing JSON file, in a hopefully-atomic
//! fashion, every time it's modified.
//! * All I/O operations are async using [tokio].
//!
//! Data can be represented in pretty much any format you can convince
//! [serde] to go along with, except for two restrictions:
//!
//! * Your serialization format shouldn't include a top-level
//! `version` field of its own, as this is reserved for our schema
//! version tracking.
//! * You can't use `#[serde(deny_unknown_fields)]`, as this conflicts
//! with our use of `#[serde(flatten)]`.
use std::{cmp::Ordering, ffi::OsString, future::Future, io::ErrorKind, path::PathBuf};
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use tokio::{
fs::{rename, File},
io::{AsyncReadExt, AsyncWriteExt},
};
/// A JSON-backed &ldquo;&ldquo;&ldquo;database&rdquo;&rdquo;&rdquo;.
///
/// This wraps a value that is loaded from a JSON file, automatically
/// migrated forward from previous schema versions, and automatically
/// written to disk when it's updated (we attempt to make saves atomic
/// using the `rename(2)` function).
pub struct JsonDb<T: Schema> {
path: PathBuf,
data: T,
}
/// Schema for a JSON-backed database.
///
/// This needs to be a (de)serializable type, with a previous schema
/// that can be migrated into the new schema, and a version number
/// which must be the previous schema's version +1, [unless this is
/// version 0][`SchemaV0`]. This can then be automatically parsed
/// from a JSON object containing a `version` field along with the
/// other fields of the corresponding schema version; earlier versions
/// will be migrated to the current version automatically.
pub trait Schema: DeserializeOwned + Serialize {
const VERSION: u32;
type Prev: Schema + Into<Self>;
fn parse(s: &str) -> Result<Self, Error> {
let Version { version } = serde_json::from_str(s)?;
match version.cmp(&Self::VERSION) {
Ordering::Less => Ok(Self::Prev::parse(s)?.into()),
Ordering::Equal => Ok(serde_json::from_str(s)?),
Ordering::Greater => Err(Error::UnknownVersion(version)),
}
}
}
/// Marker trait to indicate version 0 of a database schema.
///
/// Implementing this will automatically implement [`Schema`], with
/// version number `0` and `Self` as the previous version.
pub trait SchemaV0: DeserializeOwned + Serialize {}
impl<T: SchemaV0> Schema for T {
const VERSION: u32 = 0;
type Prev = Self;
}
#[derive(Deserialize)]
struct Version {
version: u32,
}
#[derive(Serialize)]
struct Repr<'a, T: Schema> {
version: u32,
#[serde(flatten)]
data: &'a T,
}
/// Errors that can occur while working with [`JsonDb`].
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("I/O error")]
Io(#[from] std::io::Error),
#[error("Failed to parse JSON")]
Json(#[from] serde_json::Error),
#[error("Unknown schema version {0}")]
UnknownVersion(u32),
}
impl<T: Schema + Default> JsonDb<T> {
/// Load a [`JsonDb`] from a given file, creating it and
/// initializing it with the schema's default value if it does not
/// exist.
pub async fn load(path: PathBuf) -> Result<Self, Error> {
Self::load_or_else(path, || std::future::ready(Ok(T::default()))).await
}
}
impl<T: Schema> JsonDb<T> {
/// Load a [`JsonDb`] from a given file, creating it and
/// initializing it with the provided default value if it does not
/// exist.
pub async fn load_or(path: PathBuf, default: T) -> Result<Self, Error> {
Self::load_or_else(path, || std::future::ready(Ok(default))).await
}
/// Load a [`JsonDb`] from a given file, creating it and
/// initializing it with the provided function if it does not
/// exist.
pub async fn load_or_else<F, Fut>(path: PathBuf, default: F) -> Result<Self, Error>
where
F: FnOnce() -> Fut,
Fut: Future<Output = std::io::Result<T>>,
{
let open_result = File::open(&path).await;
let data = match open_result {
Ok(mut f) => {
let mut buf = String::new();
f.read_to_string(&mut buf).await?;
T::parse(&buf)?
}
Err(e) => {
if let ErrorKind::NotFound = e.kind() {
default().await?
} else {
return Err(e.into());
}
}
};
let mut db = JsonDb { path, data };
// Always save in case we've run migrations
db.save().await?;
Ok(db)
}
async fn save(&mut self) -> Result<(), Error> {
let mut temp_file_name = OsString::from(".");
temp_file_name.push(self.path.file_name().unwrap());
temp_file_name.push(".tmp");
let temp_file_path = self.path.parent().unwrap().join(temp_file_name);
{
let mut temp_file = File::create(&temp_file_path).await?;
temp_file
.write_all(&serde_json::to_vec_pretty(&Repr {
version: T::VERSION,
data: &self.data,
})?)
.await?;
temp_file.sync_all().await?;
}
// Atomically update the actual file
rename(&temp_file_path, &self.path).await?;
Ok(())
}
/// Borrow an immutable reference to the wrapped data
pub fn read(&self) -> &T {
&self.data
}
/// Modify the wrapped data in-place, atomically writing it back
/// to disk afterwards.
pub async fn write<U, V>(&mut self, updater: U) -> Result<V, Error>
where
U: FnOnce(&mut T) -> V,
{
let result = updater(&mut self.data);
self.save().await?;
Ok(result)
}
/// Modify the wrapped data in-place using asynchronous code,
/// atomically writing it back to disk afterwards.
pub async fn write_async<U, V, Fut>(&mut self, updater: U) -> Result<V, Error>
where
U: FnOnce(&mut T) -> Fut,
Fut: Future<Output = V>,
{
let result = updater(&mut self.data).await;
self.save().await?;
Ok(result)
}
}
#[cfg(test)]
mod tests {
use std::fs::File;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use serde_with::serde_as;
use tempfile::tempdir;
use time::OffsetDateTime;
use super::{JsonDb, Schema, SchemaV0};
#[derive(Default, Debug, PartialEq, Eq, Deserialize, Serialize)]
struct V0 {
name: String,
}
impl SchemaV0 for V0 {}
#[derive(Default, Debug, PartialEq, Eq, Deserialize, Serialize)]
struct V1 {
name: String,
#[serde(default)]
gender: Option<String>,
last_updated: i64,
}
impl Schema for V1 {
const VERSION: u32 = 1;
type Prev = V0;
}
impl From<V0> for V1 {
fn from(old: V0) -> Self {
V1 {
name: old.name,
gender: None,
last_updated: 0,
}
}
}
#[serde_as]
#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)]
struct V2 {
name: String,
#[serde(default)]
gender: Option<String>,
#[serde_as(as = "time::format_description::well_known::Rfc3339")]
last_updated: OffsetDateTime,
}
impl Default for V2 {
fn default() -> Self {
V2 {
name: String::new(),
gender: None,
last_updated: OffsetDateTime::UNIX_EPOCH,
}
}
}
impl Schema for V2 {
const VERSION: u32 = 2;
type Prev = V1;
}
impl From<V1> for V2 {
fn from(old: V1) -> Self {
V2 {
name: old.name,
gender: old.gender,
last_updated: OffsetDateTime::from_unix_timestamp(old.last_updated).unwrap(),
}
}
}
const V0DATA: &str = r#"{"version":0,"name":"xenofem"}"#;
const V1DATA: &str =
r#"{"version":1,"name":"xenofem","gender":"dress go spinny","last_updated":1660585235}"#;
const V2DATA: &str = r#"{"version":2,"name":"xenofem","gender":"dress go spinny","last_updated":"2022-08-15T17:47:18Z"}"#;
#[test]
fn parse_v0() {
assert_eq!(
V0::parse(V0DATA).unwrap(),
V0 {
name: String::from("xenofem")
},
);
}
#[test]
fn parse_v1() {
assert_eq!(
V1::parse(V1DATA).unwrap(),
V1 {
name: String::from("xenofem"),
gender: Some(String::from("dress go spinny")),
last_updated: 1660585235
},
);
}
#[test]
fn migrate_v0_v1() {
assert_eq!(
V1::parse(V0DATA).unwrap(),
V1 {
name: String::from("xenofem"),
gender: None,
last_updated: 0
},
);
}
#[test]
fn parse_v2() {
assert_eq!(
V2::parse(V2DATA).unwrap(),
V2 {
name: String::from("xenofem"),
gender: Some(String::from("dress go spinny")),
last_updated: OffsetDateTime::from_unix_timestamp(1660585638).unwrap(),
},
);
}
#[test]
fn migrate_v1_v2() {
assert_eq!(
V2::parse(V1DATA).unwrap(),
V2 {
name: String::from("xenofem"),
gender: Some(String::from("dress go spinny")),
last_updated: time::macros::datetime!(2022-08-15 17:40:35 +00:00)
},
);
}
#[test]
fn migrate_v0_v2() {
assert_eq!(
V2::parse(V0DATA).unwrap(),
V2 {
name: String::from("xenofem"),
gender: None,
last_updated: time::macros::datetime!(1970-01-01 00:00:00 +00:00)
},
);
}
#[tokio::test]
async fn load_write_migrate() {
let dir = tempdir().unwrap();
let db_file = dir.path().join("test.json");
{
let mut db0: JsonDb<V0> = JsonDb::load(db_file.clone()).await.unwrap();
let value: Value = serde_json::from_reader(File::open(&db_file).unwrap()).unwrap();
assert_eq!(value["version"], 0);
assert_eq!(&value["name"], "");
db0.write(|ref mut val| {
val.name = String::from("mefonex");
})
.await
.unwrap();
let value: Value = serde_json::from_reader(File::open(&db_file).unwrap()).unwrap();
assert_eq!(&value["name"], "mefonex");
}
{
let mut db2: JsonDb<V2> = JsonDb::load(db_file.clone()).await.unwrap();
let value: Value = serde_json::from_reader(File::open(&db_file).unwrap()).unwrap();
assert_eq!(value["version"], 2);
assert_eq!(&value["name"], "mefonex");
assert_eq!(value["gender"], Value::Null);
assert_eq!(&value["last_updated"], "1970-01-01T00:00:00Z");
db2.write(|ref mut val| {
val.last_updated = OffsetDateTime::from_unix_timestamp(1660585638).unwrap();
})
.await
.unwrap();
let value: Value = serde_json::from_reader(File::open(&db_file).unwrap()).unwrap();
assert_eq!(&value["last_updated"], "2022-08-15T17:47:18Z");
}
}
}