commit efd3124b29dda68715a1f50b1c641ca7fa6ca6c7 Author: xenofem Date: Mon Aug 15 15:42:47 2022 -0400 """embedded database""" library diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4fffb2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ea1dbe1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "jsondb" +version = "0.1.0" +edition = "2021" +authors = ["xenofem "] + +[dependencies] +async-trait = "0.1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +thiserror = "1" +tokio = { version = "1", features = ["full"] } + +[dev-dependencies] +tempfile = "3.3" +time = { version = "0.3", features = ["macros"] } +serde_with = { version = "2", features = ["time_0_3"] } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f8dd7ad --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,372 @@ +//! A quick-and-dirty “““embedded database””” library. +//! +//! This is just a library for storing data in JSON files, but with +//! a few added conveniences: +//! +//! * The saved data includes a schema version number, and will be +//! automatically migrated to newer schema versions. +//! * Data is saved to the backing JSON file, in a hopefully-atomic +//! fashion, every time it's modified. +//! * All I/O operations are async using [tokio]. +//! +//! Data can be represented in pretty much any format you can convince +//! [serde] to go along with, except for two restrictions: +//! +//! * Your serialization format shouldn't include a top-level +//! `version` field of its own, as this is reserved for our schema +//! version tracking. +//! * You can't use `#[serde(deny_unknown_fields)]`, as this conflicts +//! with our use of `#[serde(flatten)]`. + +use std::{cmp::Ordering, ffi::OsString, future::Future, io::ErrorKind, path::PathBuf}; + +use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use tokio::{ + fs::{rename, File}, + io::{AsyncReadExt, AsyncWriteExt}, +}; + +/// A JSON-backed “““database”””. +/// +/// This wraps a value that is loaded from a JSON file, automatically +/// migrated forward from previous schema versions, and automatically +/// written to disk when it's updated (we attempt to make saves atomic +/// using the `rename(2)` function). +pub struct JsonDb { + path: PathBuf, + data: T, +} + +/// Schema for a JSON-backed database. +/// +/// This needs to be a (de)serializable type, with a previous schema +/// that can be migrated into the new schema, and a version number +/// which must be the previous schema's version +1, [unless this is +/// version 0][`SchemaV0`]. This can then be automatically parsed +/// from a JSON object containing a `version` field along with the +/// other fields of the corresponding schema version; earlier versions +/// will be migrated to the current version automatically. +pub trait Schema: DeserializeOwned + Serialize { + const VERSION: u32; + type Prev: Schema + Into; + + fn parse(s: &str) -> Result { + let Version { version } = serde_json::from_str(s)?; + match version.cmp(&Self::VERSION) { + Ordering::Less => Ok(Self::Prev::parse(s)?.into()), + Ordering::Equal => Ok(serde_json::from_str(s)?), + Ordering::Greater => Err(Error::UnknownVersion(version)), + } + } +} + +/// Marker trait to indicate version 0 of a database schema. +/// +/// Implementing this will automatically implement [`Schema`], with +/// version number `0` and `Self` as the previous version. +pub trait SchemaV0: DeserializeOwned + Serialize {} + +impl Schema for T { + const VERSION: u32 = 0; + type Prev = Self; +} + +#[derive(Deserialize)] +struct Version { + version: u32, +} + +#[derive(Serialize)] +struct Repr<'a, T: Schema> { + version: u32, + #[serde(flatten)] + data: &'a T, +} + +/// Errors that can occur while working with [`JsonDb`]. +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("I/O error")] + Io(#[from] std::io::Error), + #[error("Failed to parse JSON")] + Json(#[from] serde_json::Error), + #[error("Unknown schema version {0}")] + UnknownVersion(u32), +} + +impl JsonDb { + /// Load a [`JsonDb`] from a given file, creating it and + /// initializing it with the schema's default value if it does not + /// exist. + pub async fn load(path: PathBuf) -> Result { + Self::load_or_else(path, || std::future::ready(Ok(T::default()))).await + } +} + +impl JsonDb { + /// Load a [`JsonDb`] from a given file, creating it and + /// initializing it with the provided default value if it does not + /// exist. + pub async fn load_or(path: PathBuf, default: T) -> Result { + Self::load_or_else(path, || std::future::ready(Ok(default))).await + } + + /// Load a [`JsonDb`] from a given file, creating it and + /// initializing it with the provided function if it does not + /// exist. + pub async fn load_or_else(path: PathBuf, default: F) -> Result + where + F: FnOnce() -> Fut, + Fut: Future>, + { + let open_result = File::open(&path).await; + let data = match open_result { + Ok(mut f) => { + let mut buf = String::new(); + f.read_to_string(&mut buf).await?; + T::parse(&buf)? + } + Err(e) => { + if let ErrorKind::NotFound = e.kind() { + default().await? + } else { + return Err(e.into()); + } + } + }; + let mut db = JsonDb { path, data }; + // Always save in case we've run migrations + db.save().await?; + Ok(db) + } + + async fn save(&mut self) -> Result<(), Error> { + let mut temp_file_name = OsString::from("."); + temp_file_name.push(self.path.file_name().unwrap()); + temp_file_name.push(".tmp"); + let temp_file_path = self.path.parent().unwrap().join(temp_file_name); + { + let mut temp_file = File::create(&temp_file_path).await?; + temp_file + .write_all(&serde_json::to_vec_pretty(&Repr { + version: T::VERSION, + data: &self.data, + })?) + .await?; + temp_file.sync_all().await?; + } + // Atomically update the actual file + rename(&temp_file_path, &self.path).await?; + + Ok(()) + } + + /// Borrow an immutable reference to the wrapped data + pub fn read(&self) -> &T { + &self.data + } + + /// Modify the wrapped data in-place, atomically writing it back + /// to disk afterwards. + pub async fn write(&mut self, updater: U) -> Result + where + U: FnOnce(&mut T) -> V, + { + let result = updater(&mut self.data); + self.save().await?; + Ok(result) + } + + /// Modify the wrapped data in-place using asynchronous code, + /// atomically writing it back to disk afterwards. + pub async fn write_async(&mut self, updater: U) -> Result + where + U: FnOnce(&mut T) -> Fut, + Fut: Future, + { + let result = updater(&mut self.data).await; + self.save().await?; + Ok(result) + } +} + +#[cfg(test)] +mod tests { + use std::fs::File; + + use serde::{Deserialize, Serialize}; + use serde_json::Value; + use serde_with::serde_as; + use tempfile::tempdir; + use time::OffsetDateTime; + + use super::{JsonDb, Schema, SchemaV0}; + + #[derive(Default, Debug, PartialEq, Eq, Deserialize, Serialize)] + struct V0 { + name: String, + } + impl SchemaV0 for V0 {} + + #[derive(Default, Debug, PartialEq, Eq, Deserialize, Serialize)] + struct V1 { + name: String, + #[serde(default)] + gender: Option, + last_updated: i64, + } + impl Schema for V1 { + const VERSION: u32 = 1; + type Prev = V0; + } + impl From for V1 { + fn from(old: V0) -> Self { + V1 { + name: old.name, + gender: None, + last_updated: 0, + } + } + } + + #[serde_as] + #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] + struct V2 { + name: String, + #[serde(default)] + gender: Option, + #[serde_as(as = "time::format_description::well_known::Rfc3339")] + last_updated: OffsetDateTime, + } + impl Default for V2 { + fn default() -> Self { + V2 { + name: String::new(), + gender: None, + last_updated: OffsetDateTime::UNIX_EPOCH, + } + } + } + impl Schema for V2 { + const VERSION: u32 = 2; + type Prev = V1; + } + impl From for V2 { + fn from(old: V1) -> Self { + V2 { + name: old.name, + gender: old.gender, + last_updated: OffsetDateTime::from_unix_timestamp(old.last_updated).unwrap(), + } + } + } + + const V0DATA: &str = r#"{"version":0,"name":"xenofem"}"#; + const V1DATA: &str = + r#"{"version":1,"name":"xenofem","gender":"dress go spinny","last_updated":1660585235}"#; + const V2DATA: &str = r#"{"version":2,"name":"xenofem","gender":"dress go spinny","last_updated":"2022-08-15T17:47:18Z"}"#; + + #[test] + fn parse_v0() { + assert_eq!( + V0::parse(V0DATA).unwrap(), + V0 { + name: String::from("xenofem") + }, + ); + } + + #[test] + fn parse_v1() { + assert_eq!( + V1::parse(V1DATA).unwrap(), + V1 { + name: String::from("xenofem"), + gender: Some(String::from("dress go spinny")), + last_updated: 1660585235 + }, + ); + } + + #[test] + fn migrate_v0_v1() { + assert_eq!( + V1::parse(V0DATA).unwrap(), + V1 { + name: String::from("xenofem"), + gender: None, + last_updated: 0 + }, + ); + } + + #[test] + fn parse_v2() { + assert_eq!( + V2::parse(V2DATA).unwrap(), + V2 { + name: String::from("xenofem"), + gender: Some(String::from("dress go spinny")), + last_updated: OffsetDateTime::from_unix_timestamp(1660585638).unwrap(), + }, + ); + } + + #[test] + fn migrate_v1_v2() { + assert_eq!( + V2::parse(V1DATA).unwrap(), + V2 { + name: String::from("xenofem"), + gender: Some(String::from("dress go spinny")), + last_updated: time::macros::datetime!(2022-08-15 17:40:35 +00:00) + }, + ); + } + + #[test] + fn migrate_v0_v2() { + assert_eq!( + V2::parse(V0DATA).unwrap(), + V2 { + name: String::from("xenofem"), + gender: None, + last_updated: time::macros::datetime!(1970-01-01 00:00:00 +00:00) + }, + ); + } + + #[tokio::test] + async fn load_write_migrate() { + let dir = tempdir().unwrap(); + let db_file = dir.path().join("test.json"); + { + let mut db0: JsonDb = JsonDb::load(db_file.clone()).await.unwrap(); + let value: Value = serde_json::from_reader(File::open(&db_file).unwrap()).unwrap(); + assert_eq!(value["version"], 0); + assert_eq!(&value["name"], ""); + db0.write(|ref mut val| { + val.name = String::from("mefonex"); + }) + .await + .unwrap(); + let value: Value = serde_json::from_reader(File::open(&db_file).unwrap()).unwrap(); + assert_eq!(&value["name"], "mefonex"); + } + { + let mut db2: JsonDb = JsonDb::load(db_file.clone()).await.unwrap(); + let value: Value = serde_json::from_reader(File::open(&db_file).unwrap()).unwrap(); + assert_eq!(value["version"], 2); + assert_eq!(&value["name"], "mefonex"); + assert_eq!(value["gender"], Value::Null); + assert_eq!(&value["last_updated"], "1970-01-01T00:00:00Z"); + db2.write(|ref mut val| { + val.last_updated = OffsetDateTime::from_unix_timestamp(1660585638).unwrap(); + }) + .await + .unwrap(); + let value: Value = serde_json::from_reader(File::open(&db_file).unwrap()).unwrap(); + assert_eq!(&value["last_updated"], "2022-08-15T17:47:18Z"); + } + } +}