Combine domnet and sorta under evenmoreutils

This commit is contained in:
xenofem 2023-07-21 18:50:10 -04:00
commit 686e3b8f33
5 changed files with 378 additions and 0 deletions

81
src/bin/sorta.rs Normal file
View file

@ -0,0 +1,81 @@
use std::io::{self, BufRead, Write};
use lazy_static::lazy_static;
use regex::{Regex, escape};
lazy_static! {
static ref PARENS: Vec<(char, char)> = vec![
('(', ')'),
('[', ']'),
('{', '}'),
('【', '】'),
];
static ref ARTICLES: Vec<&'static str> = vec![
"a",
"an",
"the",
];
static ref CHAFF: Vec<&'static str> = vec![
"-",
"_",
".",
"",
"",
];
static ref EXTENSIONS: Vec<&'static str> = vec![
"mkv",
"mp4",
"webm",
];
}
fn alternator_regex(choices: &Vec<&str>) -> String {
let mut result = String::new();
for (i, choice) in choices.iter().enumerate() {
if i > 0 {
result.push('|');
}
result.push_str(&escape(choice));
}
result
}
fn main() -> io::Result<()> {
let article_regex_raw = format!(r"\b({})\b", alternator_regex(&ARTICLES));
let extension_regex_raw = format!(r"\.({})", alternator_regex(&EXTENSIONS));
let mut space_paren_chaff_regex_raw = String::from(r"\s");
for (open, close) in PARENS.iter() {
let open = escape(&open.to_string());
let close = escape(&close.to_string());
space_paren_chaff_regex_raw.push_str(&format!("|{open}[^{close}]+{close}"));
}
for chaff in CHAFF.iter() {
space_paren_chaff_regex_raw.push('|');
space_paren_chaff_regex_raw.push_str(&escape(chaff));
}
let strip_start_regex = Regex::new(&format!("^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*")).unwrap();
let strip_end_regex = Regex::new(&format!("(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$")).unwrap();
let mut results = Vec::new();
let stdin = io::stdin();
for line in stdin.lock().lines() {
let line = line?;
let strip_start_idx = strip_start_regex.find(&line).map(|m| m.end()).unwrap_or(0);
let strip_end_idx = strip_end_regex.find_at(&line, strip_start_idx).map(|m| m.start()).unwrap_or(line.len());
let item = ((&line[strip_start_idx..]).to_lowercase(), line, strip_start_idx, strip_end_idx);
let insertion_idx = match results.binary_search(&item) {
Ok(n) => n,
Err(n) => n,
};
results.insert(insertion_idx, item);
}
let mut stdout = io::stdout().lock();
for (_, line, strip_start_idx, strip_end_idx) in results.into_iter() {
writeln!(stdout, "{}\x1b[01;34m{}\x1b[0m{}", &line[..strip_start_idx], &line[strip_start_idx..strip_end_idx], &line[strip_end_idx..])?;
}
Ok(())
}