Combine domnet and sorta under evenmoreutils
This commit is contained in:
commit
686e3b8f33
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
/target
|
117
Cargo.lock
generated
Normal file
117
Cargo.lock
generated
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "evenmoreutils"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"lazy_static",
|
||||||
|
"regex",
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.66"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.31"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.7.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.27"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror"
|
||||||
|
version = "1.0.44"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
|
||||||
|
dependencies = [
|
||||||
|
"thiserror-impl",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror-impl"
|
||||||
|
version = "1.0.44"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
|
11
Cargo.toml
Normal file
11
Cargo.toml
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
[package]
|
||||||
|
name = "evenmoreutils"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
authors = ["xenofem <xenofem@xeno.science>"]
|
||||||
|
license = "MIT"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
lazy_static = "1.4"
|
||||||
|
regex = "1.7"
|
||||||
|
thiserror = "1.0"
|
168
src/bin/domnet.rs
Normal file
168
src/bin/domnet.rs
Normal file
|
@ -0,0 +1,168 @@
|
||||||
|
use std::fmt;
|
||||||
|
use std::net::{AddrParseError, Ipv4Addr, Ipv6Addr};
|
||||||
|
use std::num::ParseIntError;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
struct V4Subnet(Vec<bool>);
|
||||||
|
struct V6Subnet(Vec<bool>);
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
enum SubnetParseError {
|
||||||
|
#[error("failed to parse CIDR block")]
|
||||||
|
Cidr,
|
||||||
|
#[error("failed to parse IP address")]
|
||||||
|
Addr(#[from] AddrParseError),
|
||||||
|
#[error("failed to parse prefix length")]
|
||||||
|
PrefixLength(#[from] ParseIntError),
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bitstring_to_int(b: &[bool], size: usize) -> u128 {
|
||||||
|
let mut result = 0;
|
||||||
|
for idx in 0..size {
|
||||||
|
result *= 2;
|
||||||
|
if let Some(true) = b.get(idx) {
|
||||||
|
result += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn int_to_bitstring(val: u128, size: usize, prefix_len: usize) -> Vec<bool> {
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for idx in 0..prefix_len {
|
||||||
|
result.push((val >> (size - 1 - idx)) % 2 == 1);
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for V4Subnet {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
let addr = Ipv4Addr::from(bitstring_to_int(&self.0, 32) as u32);
|
||||||
|
write!(f, "{}/{}", addr, self.0.len())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for V4Subnet {
|
||||||
|
type Err = SubnetParseError;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
let (addr, len) = s.split_once('/').ok_or(SubnetParseError::Cidr)?;
|
||||||
|
let val = u32::from(Ipv4Addr::from_str(addr)?);
|
||||||
|
let prefix_len = usize::from_str(len)?;
|
||||||
|
Ok(V4Subnet(int_to_bitstring(val as u128, 32, prefix_len)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for V6Subnet {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
let addr = Ipv6Addr::from(bitstring_to_int(&self.0, 128));
|
||||||
|
write!(f, "{}/{}", addr, self.0.len())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for V6Subnet {
|
||||||
|
type Err = SubnetParseError;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
let (addr, len) = s.split_once('/').ok_or(SubnetParseError::Cidr)?;
|
||||||
|
let val = u128::from(Ipv6Addr::from_str(addr)?);
|
||||||
|
let prefix_len = usize::from_str(len)?;
|
||||||
|
Ok(V6Subnet(int_to_bitstring(val, 128, prefix_len)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finds the smallest set of subnets that complement the set of subnets provided
|
||||||
|
fn invert_subnets(subnets: &Vec<Vec<bool>>, size: usize) -> Vec<Vec<bool>> {
|
||||||
|
invert_subnets_helper(&mut Vec::new(), subnets, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finds the smallest set of subnets that complement, within `root`, the set of subnets provided
|
||||||
|
fn invert_subnets_helper(
|
||||||
|
root: &mut Vec<bool>,
|
||||||
|
subnets: &Vec<Vec<bool>>,
|
||||||
|
size: usize,
|
||||||
|
) -> Vec<Vec<bool>> {
|
||||||
|
if subnets.iter().any(|subnet| root.starts_with(subnet)) {
|
||||||
|
// We're already within one of the provided subnets, bail out
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
if subnets.iter().any(|subnet| subnet.starts_with(root)) {
|
||||||
|
// We need to narrow down further
|
||||||
|
|
||||||
|
if root.len() == size {
|
||||||
|
// There's nowhere further to go
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
// left branch
|
||||||
|
root.push(false);
|
||||||
|
let mut results = invert_subnets_helper(root, subnets, size);
|
||||||
|
root.pop();
|
||||||
|
|
||||||
|
// right branch
|
||||||
|
root.push(true);
|
||||||
|
results.append(&mut invert_subnets_helper(root, subnets, size));
|
||||||
|
root.pop();
|
||||||
|
|
||||||
|
results
|
||||||
|
} else {
|
||||||
|
vec![root.to_owned()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(name: Option<String>) {
|
||||||
|
eprintln!("usage: {} SUBNET ...", name.as_deref().unwrap_or("domnet"));
|
||||||
|
eprintln!("Calculate the inverse of a set of IP subnets.");
|
||||||
|
eprintln!("Arguments must be all IPv4 subnets or all IPv6 subnets, in CIDR block format.");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let mut args = std::env::args();
|
||||||
|
let name = args.next();
|
||||||
|
let first = args.next();
|
||||||
|
|
||||||
|
let subnet = if let Some(s) = first {
|
||||||
|
s
|
||||||
|
} else {
|
||||||
|
return usage(name);
|
||||||
|
};
|
||||||
|
if let Ok(v4subnet) = V4Subnet::from_str(&subnet) {
|
||||||
|
let rest = args
|
||||||
|
.map(|s| V4Subnet::from_str(&s))
|
||||||
|
.collect::<Result<Vec<V4Subnet>, SubnetParseError>>();
|
||||||
|
let subnets = if let Ok(r) = rest {
|
||||||
|
std::iter::once(v4subnet)
|
||||||
|
.chain(r.into_iter())
|
||||||
|
.map(|s| s.0)
|
||||||
|
.collect::<Vec<Vec<bool>>>()
|
||||||
|
} else {
|
||||||
|
return usage(name);
|
||||||
|
};
|
||||||
|
let inverse = invert_subnets(&subnets, 32);
|
||||||
|
for subnet in inverse.into_iter() {
|
||||||
|
println!("{}", V4Subnet(subnet));
|
||||||
|
}
|
||||||
|
} else if let Ok(v6subnet) = V6Subnet::from_str(&subnet) {
|
||||||
|
let rest = args
|
||||||
|
.map(|s| V6Subnet::from_str(&s))
|
||||||
|
.collect::<Result<Vec<V6Subnet>, SubnetParseError>>();
|
||||||
|
let subnets = if let Ok(r) = rest {
|
||||||
|
std::iter::once(v6subnet)
|
||||||
|
.chain(r.into_iter())
|
||||||
|
.map(|s| s.0)
|
||||||
|
.collect::<Vec<Vec<bool>>>()
|
||||||
|
} else {
|
||||||
|
return usage(name);
|
||||||
|
};
|
||||||
|
let inverse = invert_subnets(&subnets, 128);
|
||||||
|
for subnet in inverse.into_iter() {
|
||||||
|
println!("{}", V6Subnet(subnet));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
usage(name);
|
||||||
|
}
|
||||||
|
}
|
81
src/bin/sorta.rs
Normal file
81
src/bin/sorta.rs
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
use std::io::{self, BufRead, Write};
|
||||||
|
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use regex::{Regex, escape};
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref PARENS: Vec<(char, char)> = vec![
|
||||||
|
('(', ')'),
|
||||||
|
('[', ']'),
|
||||||
|
('{', '}'),
|
||||||
|
('【', '】'),
|
||||||
|
];
|
||||||
|
static ref ARTICLES: Vec<&'static str> = vec![
|
||||||
|
"a",
|
||||||
|
"an",
|
||||||
|
"the",
|
||||||
|
];
|
||||||
|
static ref CHAFF: Vec<&'static str> = vec![
|
||||||
|
"-",
|
||||||
|
"_",
|
||||||
|
".",
|
||||||
|
"『",
|
||||||
|
"』",
|
||||||
|
];
|
||||||
|
static ref EXTENSIONS: Vec<&'static str> = vec![
|
||||||
|
"mkv",
|
||||||
|
"mp4",
|
||||||
|
"webm",
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alternator_regex(choices: &Vec<&str>) -> String {
|
||||||
|
let mut result = String::new();
|
||||||
|
for (i, choice) in choices.iter().enumerate() {
|
||||||
|
if i > 0 {
|
||||||
|
result.push('|');
|
||||||
|
}
|
||||||
|
result.push_str(&escape(choice));
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> io::Result<()> {
|
||||||
|
let article_regex_raw = format!(r"\b({})\b", alternator_regex(&ARTICLES));
|
||||||
|
let extension_regex_raw = format!(r"\.({})", alternator_regex(&EXTENSIONS));
|
||||||
|
|
||||||
|
let mut space_paren_chaff_regex_raw = String::from(r"\s");
|
||||||
|
for (open, close) in PARENS.iter() {
|
||||||
|
let open = escape(&open.to_string());
|
||||||
|
let close = escape(&close.to_string());
|
||||||
|
space_paren_chaff_regex_raw.push_str(&format!("|{open}[^{close}]+{close}"));
|
||||||
|
}
|
||||||
|
for chaff in CHAFF.iter() {
|
||||||
|
space_paren_chaff_regex_raw.push('|');
|
||||||
|
space_paren_chaff_regex_raw.push_str(&escape(chaff));
|
||||||
|
}
|
||||||
|
|
||||||
|
let strip_start_regex = Regex::new(&format!("^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*")).unwrap();
|
||||||
|
let strip_end_regex = Regex::new(&format!("(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$")).unwrap();
|
||||||
|
|
||||||
|
let mut results = Vec::new();
|
||||||
|
|
||||||
|
let stdin = io::stdin();
|
||||||
|
for line in stdin.lock().lines() {
|
||||||
|
let line = line?;
|
||||||
|
let strip_start_idx = strip_start_regex.find(&line).map(|m| m.end()).unwrap_or(0);
|
||||||
|
let strip_end_idx = strip_end_regex.find_at(&line, strip_start_idx).map(|m| m.start()).unwrap_or(line.len());
|
||||||
|
let item = ((&line[strip_start_idx..]).to_lowercase(), line, strip_start_idx, strip_end_idx);
|
||||||
|
let insertion_idx = match results.binary_search(&item) {
|
||||||
|
Ok(n) => n,
|
||||||
|
Err(n) => n,
|
||||||
|
};
|
||||||
|
results.insert(insertion_idx, item);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut stdout = io::stdout().lock();
|
||||||
|
for (_, line, strip_start_idx, strip_end_idx) in results.into_iter() {
|
||||||
|
writeln!(stdout, "{}\x1b[01;34m{}\x1b[0m{}", &line[..strip_start_idx], &line[strip_start_idx..strip_end_idx], &line[strip_end_idx..])?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
Loading…
Reference in a new issue