commit 686e3b8f3351a1c67059386d9934462dc5bd8b9d Author: xenofem Date: Fri Jul 21 18:50:10 2023 -0400 Combine domnet and sorta under evenmoreutils diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..6350fac --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,117 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +dependencies = [ + "memchr", +] + +[[package]] +name = "evenmoreutils" +version = "0.1.0" +dependencies = [ + "lazy_static", + "regex", + "thiserror", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" + +[[package]] +name = "syn" +version = "2.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..eb35553 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "evenmoreutils" +version = "0.1.0" +edition = "2021" +authors = ["xenofem "] +license = "MIT" + +[dependencies] +lazy_static = "1.4" +regex = "1.7" +thiserror = "1.0" diff --git a/src/bin/domnet.rs b/src/bin/domnet.rs new file mode 100644 index 0000000..8db950d --- /dev/null +++ b/src/bin/domnet.rs @@ -0,0 +1,168 @@ +use std::fmt; +use std::net::{AddrParseError, Ipv4Addr, Ipv6Addr}; +use std::num::ParseIntError; +use std::str::FromStr; + +use thiserror::Error; + +struct V4Subnet(Vec); +struct V6Subnet(Vec); + +#[derive(Error, Debug)] +enum SubnetParseError { + #[error("failed to parse CIDR block")] + Cidr, + #[error("failed to parse IP address")] + Addr(#[from] AddrParseError), + #[error("failed to parse prefix length")] + PrefixLength(#[from] ParseIntError), +} + +fn bitstring_to_int(b: &[bool], size: usize) -> u128 { + let mut result = 0; + for idx in 0..size { + result *= 2; + if let Some(true) = b.get(idx) { + result += 1; + } + } + result +} + +fn int_to_bitstring(val: u128, size: usize, prefix_len: usize) -> Vec { + let mut result = Vec::new(); + for idx in 0..prefix_len { + result.push((val >> (size - 1 - idx)) % 2 == 1); + } + result +} + +impl fmt::Display for V4Subnet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let addr = Ipv4Addr::from(bitstring_to_int(&self.0, 32) as u32); + write!(f, "{}/{}", addr, self.0.len()) + } +} + +impl FromStr for V4Subnet { + type Err = SubnetParseError; + + fn from_str(s: &str) -> Result { + let (addr, len) = s.split_once('/').ok_or(SubnetParseError::Cidr)?; + let val = u32::from(Ipv4Addr::from_str(addr)?); + let prefix_len = usize::from_str(len)?; + Ok(V4Subnet(int_to_bitstring(val as u128, 32, prefix_len))) + } +} + +impl fmt::Display for V6Subnet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let addr = Ipv6Addr::from(bitstring_to_int(&self.0, 128)); + write!(f, "{}/{}", addr, self.0.len()) + } +} + +impl FromStr for V6Subnet { + type Err = SubnetParseError; + + fn from_str(s: &str) -> Result { + let (addr, len) = s.split_once('/').ok_or(SubnetParseError::Cidr)?; + let val = u128::from(Ipv6Addr::from_str(addr)?); + let prefix_len = usize::from_str(len)?; + Ok(V6Subnet(int_to_bitstring(val, 128, prefix_len))) + } +} + +/// Finds the smallest set of subnets that complement the set of subnets provided +fn invert_subnets(subnets: &Vec>, size: usize) -> Vec> { + invert_subnets_helper(&mut Vec::new(), subnets, size) +} + +// Finds the smallest set of subnets that complement, within `root`, the set of subnets provided +fn invert_subnets_helper( + root: &mut Vec, + subnets: &Vec>, + size: usize, +) -> Vec> { + if subnets.iter().any(|subnet| root.starts_with(subnet)) { + // We're already within one of the provided subnets, bail out + return Vec::new(); + } + + if subnets.iter().any(|subnet| subnet.starts_with(root)) { + // We need to narrow down further + + if root.len() == size { + // There's nowhere further to go + return Vec::new(); + } + + // left branch + root.push(false); + let mut results = invert_subnets_helper(root, subnets, size); + root.pop(); + + // right branch + root.push(true); + results.append(&mut invert_subnets_helper(root, subnets, size)); + root.pop(); + + results + } else { + vec![root.to_owned()] + } +} + +fn usage(name: Option) { + eprintln!("usage: {} SUBNET ...", name.as_deref().unwrap_or("domnet")); + eprintln!("Calculate the inverse of a set of IP subnets."); + eprintln!("Arguments must be all IPv4 subnets or all IPv6 subnets, in CIDR block format."); + std::process::exit(1); +} + +fn main() { + let mut args = std::env::args(); + let name = args.next(); + let first = args.next(); + + let subnet = if let Some(s) = first { + s + } else { + return usage(name); + }; + if let Ok(v4subnet) = V4Subnet::from_str(&subnet) { + let rest = args + .map(|s| V4Subnet::from_str(&s)) + .collect::, SubnetParseError>>(); + let subnets = if let Ok(r) = rest { + std::iter::once(v4subnet) + .chain(r.into_iter()) + .map(|s| s.0) + .collect::>>() + } else { + return usage(name); + }; + let inverse = invert_subnets(&subnets, 32); + for subnet in inverse.into_iter() { + println!("{}", V4Subnet(subnet)); + } + } else if let Ok(v6subnet) = V6Subnet::from_str(&subnet) { + let rest = args + .map(|s| V6Subnet::from_str(&s)) + .collect::, SubnetParseError>>(); + let subnets = if let Ok(r) = rest { + std::iter::once(v6subnet) + .chain(r.into_iter()) + .map(|s| s.0) + .collect::>>() + } else { + return usage(name); + }; + let inverse = invert_subnets(&subnets, 128); + for subnet in inverse.into_iter() { + println!("{}", V6Subnet(subnet)); + } + } else { + usage(name); + } +} diff --git a/src/bin/sorta.rs b/src/bin/sorta.rs new file mode 100644 index 0000000..86078d2 --- /dev/null +++ b/src/bin/sorta.rs @@ -0,0 +1,81 @@ +use std::io::{self, BufRead, Write}; + +use lazy_static::lazy_static; +use regex::{Regex, escape}; + +lazy_static! { + static ref PARENS: Vec<(char, char)> = vec![ + ('(', ')'), + ('[', ']'), + ('{', '}'), + ('【', '】'), + ]; + static ref ARTICLES: Vec<&'static str> = vec![ + "a", + "an", + "the", + ]; + static ref CHAFF: Vec<&'static str> = vec![ + "-", + "_", + ".", + "『", + "』", + ]; + static ref EXTENSIONS: Vec<&'static str> = vec![ + "mkv", + "mp4", + "webm", + ]; +} + +fn alternator_regex(choices: &Vec<&str>) -> String { + let mut result = String::new(); + for (i, choice) in choices.iter().enumerate() { + if i > 0 { + result.push('|'); + } + result.push_str(&escape(choice)); + } + result +} + +fn main() -> io::Result<()> { + let article_regex_raw = format!(r"\b({})\b", alternator_regex(&ARTICLES)); + let extension_regex_raw = format!(r"\.({})", alternator_regex(&EXTENSIONS)); + + let mut space_paren_chaff_regex_raw = String::from(r"\s"); + for (open, close) in PARENS.iter() { + let open = escape(&open.to_string()); + let close = escape(&close.to_string()); + space_paren_chaff_regex_raw.push_str(&format!("|{open}[^{close}]+{close}")); + } + for chaff in CHAFF.iter() { + space_paren_chaff_regex_raw.push('|'); + space_paren_chaff_regex_raw.push_str(&escape(chaff)); + } + + let strip_start_regex = Regex::new(&format!("^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*")).unwrap(); + let strip_end_regex = Regex::new(&format!("(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$")).unwrap(); + + let mut results = Vec::new(); + + let stdin = io::stdin(); + for line in stdin.lock().lines() { + let line = line?; + let strip_start_idx = strip_start_regex.find(&line).map(|m| m.end()).unwrap_or(0); + let strip_end_idx = strip_end_regex.find_at(&line, strip_start_idx).map(|m| m.start()).unwrap_or(line.len()); + let item = ((&line[strip_start_idx..]).to_lowercase(), line, strip_start_idx, strip_end_idx); + let insertion_idx = match results.binary_search(&item) { + Ok(n) => n, + Err(n) => n, + }; + results.insert(insertion_idx, item); + } + + let mut stdout = io::stdout().lock(); + for (_, line, strip_start_idx, strip_end_idx) in results.into_iter() { + writeln!(stdout, "{}\x1b[01;34m{}\x1b[0m{}", &line[..strip_start_idx], &line[strip_start_idx..strip_end_idx], &line[strip_end_idx..])?; + } + Ok(()) +}