Combine domnet and sorta under evenmoreutils

main
xenofem 2023-07-21 18:50:10 -04:00
commit 686e3b8f33
5 changed files with 378 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

117
Cargo.lock generated Normal file
View File

@ -0,0 +1,117 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "evenmoreutils"
version = "0.1.0"
dependencies = [
"lazy_static",
"regex",
"thiserror",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "proc-macro2"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]]
name = "syn"
version = "2.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"

11
Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "evenmoreutils"
version = "0.1.0"
edition = "2021"
authors = ["xenofem <xenofem@xeno.science>"]
license = "MIT"
[dependencies]
lazy_static = "1.4"
regex = "1.7"
thiserror = "1.0"

168
src/bin/domnet.rs Normal file
View File

@ -0,0 +1,168 @@
use std::fmt;
use std::net::{AddrParseError, Ipv4Addr, Ipv6Addr};
use std::num::ParseIntError;
use std::str::FromStr;
use thiserror::Error;
struct V4Subnet(Vec<bool>);
struct V6Subnet(Vec<bool>);
#[derive(Error, Debug)]
enum SubnetParseError {
#[error("failed to parse CIDR block")]
Cidr,
#[error("failed to parse IP address")]
Addr(#[from] AddrParseError),
#[error("failed to parse prefix length")]
PrefixLength(#[from] ParseIntError),
}
fn bitstring_to_int(b: &[bool], size: usize) -> u128 {
let mut result = 0;
for idx in 0..size {
result *= 2;
if let Some(true) = b.get(idx) {
result += 1;
}
}
result
}
fn int_to_bitstring(val: u128, size: usize, prefix_len: usize) -> Vec<bool> {
let mut result = Vec::new();
for idx in 0..prefix_len {
result.push((val >> (size - 1 - idx)) % 2 == 1);
}
result
}
impl fmt::Display for V4Subnet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let addr = Ipv4Addr::from(bitstring_to_int(&self.0, 32) as u32);
write!(f, "{}/{}", addr, self.0.len())
}
}
impl FromStr for V4Subnet {
type Err = SubnetParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (addr, len) = s.split_once('/').ok_or(SubnetParseError::Cidr)?;
let val = u32::from(Ipv4Addr::from_str(addr)?);
let prefix_len = usize::from_str(len)?;
Ok(V4Subnet(int_to_bitstring(val as u128, 32, prefix_len)))
}
}
impl fmt::Display for V6Subnet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let addr = Ipv6Addr::from(bitstring_to_int(&self.0, 128));
write!(f, "{}/{}", addr, self.0.len())
}
}
impl FromStr for V6Subnet {
type Err = SubnetParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (addr, len) = s.split_once('/').ok_or(SubnetParseError::Cidr)?;
let val = u128::from(Ipv6Addr::from_str(addr)?);
let prefix_len = usize::from_str(len)?;
Ok(V6Subnet(int_to_bitstring(val, 128, prefix_len)))
}
}
/// Finds the smallest set of subnets that complement the set of subnets provided
fn invert_subnets(subnets: &Vec<Vec<bool>>, size: usize) -> Vec<Vec<bool>> {
invert_subnets_helper(&mut Vec::new(), subnets, size)
}
// Finds the smallest set of subnets that complement, within `root`, the set of subnets provided
fn invert_subnets_helper(
root: &mut Vec<bool>,
subnets: &Vec<Vec<bool>>,
size: usize,
) -> Vec<Vec<bool>> {
if subnets.iter().any(|subnet| root.starts_with(subnet)) {
// We're already within one of the provided subnets, bail out
return Vec::new();
}
if subnets.iter().any(|subnet| subnet.starts_with(root)) {
// We need to narrow down further
if root.len() == size {
// There's nowhere further to go
return Vec::new();
}
// left branch
root.push(false);
let mut results = invert_subnets_helper(root, subnets, size);
root.pop();
// right branch
root.push(true);
results.append(&mut invert_subnets_helper(root, subnets, size));
root.pop();
results
} else {
vec![root.to_owned()]
}
}
fn usage(name: Option<String>) {
eprintln!("usage: {} SUBNET ...", name.as_deref().unwrap_or("domnet"));
eprintln!("Calculate the inverse of a set of IP subnets.");
eprintln!("Arguments must be all IPv4 subnets or all IPv6 subnets, in CIDR block format.");
std::process::exit(1);
}
fn main() {
let mut args = std::env::args();
let name = args.next();
let first = args.next();
let subnet = if let Some(s) = first {
s
} else {
return usage(name);
};
if let Ok(v4subnet) = V4Subnet::from_str(&subnet) {
let rest = args
.map(|s| V4Subnet::from_str(&s))
.collect::<Result<Vec<V4Subnet>, SubnetParseError>>();
let subnets = if let Ok(r) = rest {
std::iter::once(v4subnet)
.chain(r.into_iter())
.map(|s| s.0)
.collect::<Vec<Vec<bool>>>()
} else {
return usage(name);
};
let inverse = invert_subnets(&subnets, 32);
for subnet in inverse.into_iter() {
println!("{}", V4Subnet(subnet));
}
} else if let Ok(v6subnet) = V6Subnet::from_str(&subnet) {
let rest = args
.map(|s| V6Subnet::from_str(&s))
.collect::<Result<Vec<V6Subnet>, SubnetParseError>>();
let subnets = if let Ok(r) = rest {
std::iter::once(v6subnet)
.chain(r.into_iter())
.map(|s| s.0)
.collect::<Vec<Vec<bool>>>()
} else {
return usage(name);
};
let inverse = invert_subnets(&subnets, 128);
for subnet in inverse.into_iter() {
println!("{}", V6Subnet(subnet));
}
} else {
usage(name);
}
}

81
src/bin/sorta.rs Normal file
View File

@ -0,0 +1,81 @@
use std::io::{self, BufRead, Write};
use lazy_static::lazy_static;
use regex::{Regex, escape};
lazy_static! {
static ref PARENS: Vec<(char, char)> = vec![
('(', ')'),
('[', ']'),
('{', '}'),
('【', '】'),
];
static ref ARTICLES: Vec<&'static str> = vec![
"a",
"an",
"the",
];
static ref CHAFF: Vec<&'static str> = vec![
"-",
"_",
".",
"",
"",
];
static ref EXTENSIONS: Vec<&'static str> = vec![
"mkv",
"mp4",
"webm",
];
}
fn alternator_regex(choices: &Vec<&str>) -> String {
let mut result = String::new();
for (i, choice) in choices.iter().enumerate() {
if i > 0 {
result.push('|');
}
result.push_str(&escape(choice));
}
result
}
fn main() -> io::Result<()> {
let article_regex_raw = format!(r"\b({})\b", alternator_regex(&ARTICLES));
let extension_regex_raw = format!(r"\.({})", alternator_regex(&EXTENSIONS));
let mut space_paren_chaff_regex_raw = String::from(r"\s");
for (open, close) in PARENS.iter() {
let open = escape(&open.to_string());
let close = escape(&close.to_string());
space_paren_chaff_regex_raw.push_str(&format!("|{open}[^{close}]+{close}"));
}
for chaff in CHAFF.iter() {
space_paren_chaff_regex_raw.push('|');
space_paren_chaff_regex_raw.push_str(&escape(chaff));
}
let strip_start_regex = Regex::new(&format!("^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*")).unwrap();
let strip_end_regex = Regex::new(&format!("(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$")).unwrap();
let mut results = Vec::new();
let stdin = io::stdin();
for line in stdin.lock().lines() {
let line = line?;
let strip_start_idx = strip_start_regex.find(&line).map(|m| m.end()).unwrap_or(0);
let strip_end_idx = strip_end_regex.find_at(&line, strip_start_idx).map(|m| m.start()).unwrap_or(line.len());
let item = ((&line[strip_start_idx..]).to_lowercase(), line, strip_start_idx, strip_end_idx);
let insertion_idx = match results.binary_search(&item) {
Ok(n) => n,
Err(n) => n,
};
results.insert(insertion_idx, item);
}
let mut stdout = io::stdout().lock();
for (_, line, strip_start_idx, strip_end_idx) in results.into_iter() {
writeln!(stdout, "{}\x1b[01;34m{}\x1b[0m{}", &line[..strip_start_idx], &line[strip_start_idx..strip_end_idx], &line[strip_end_idx..])?;
}
Ok(())
}