evenmoreutils/src/bin/sorta.rs

120 lines
3.6 KiB
Rust

use std::io::{self, BufRead, Write};
use clap::Parser;
use lazy_static::lazy_static;
use regex::{escape, Regex};
const COLOR_START: &str = "\x1b[01;34m";
const COLOR_END: &str = "\x1b[0m";
lazy_static! {
static ref PARENS: Vec<(char, char)> = vec![('(', ')'), ('[', ']'), ('{', '}'), ('【', '】')];
static ref ARTICLES: Vec<&'static str> = vec!["a", "an", "the"];
static ref CHAFF: Vec<&'static str> = vec!["-", "_", ".", "", ""];
static ref EXTENSIONS: Vec<&'static str> = vec!["mkv", "mp4", "webm"];
}
#[derive(Parser, Debug)]
#[clap(author, version)]
/// Sort lines based on the "important" part of each line
///
/// sorta sorts the lines of stdin by stripping off prefixes such as
/// parenthesized descriptors, articles, etc, sorting based on the
/// stripped versions, and printing the original lines to stdout in
/// sorted order.
struct Cli {
/// Indent lines with spaces to align the important parts of each line
#[arg(short, long)]
align: bool,
/// Color the important part of each line
#[arg(short, long)]
color: bool,
}
fn alternator_regex(choices: &[&str]) -> String {
let mut result = String::new();
for (i, choice) in choices.iter().enumerate() {
if i > 0 {
result.push('|');
}
result.push_str(&escape(choice));
}
result
}
fn main() -> io::Result<()> {
let article_regex_raw = format!(r"\b({})\b", alternator_regex(&ARTICLES));
let extension_regex_raw = format!(r"\.({})", alternator_regex(&EXTENSIONS));
let cli = Cli::parse();
let mut space_paren_chaff_regex_raw = String::from(r"\s");
for (open, close) in PARENS.iter() {
let open = escape(&open.to_string());
let close = escape(&close.to_string());
space_paren_chaff_regex_raw.push_str(&format!("|{open}[^{close}]+{close}"));
}
for chaff in CHAFF.iter() {
space_paren_chaff_regex_raw.push('|');
space_paren_chaff_regex_raw.push_str(&escape(chaff));
}
let strip_start_regex = Regex::new(&format!(
"^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*"
))
.unwrap();
let strip_end_regex = Regex::new(&format!(
"(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$"
))
.unwrap();
let mut results = Vec::new();
let stdin = io::stdin();
for line in stdin.lock().lines() {
let line = line?;
let strip_start_idx = strip_start_regex.find(&line).map(|m| m.end()).unwrap_or(0);
let strip_end_idx = strip_end_regex
.find_at(&line, strip_start_idx)
.map(|m| m.start())
.unwrap_or(line.len());
let item = (
line[strip_start_idx..].to_lowercase(),
line,
strip_start_idx,
strip_end_idx,
);
let insertion_idx = match results.binary_search(&item) {
Ok(n) => n,
Err(n) => n,
};
results.insert(insertion_idx, item);
}
let alignment_width = if cli.align {
results
.iter()
.map(|(_, _, start_idx, _)| start_idx)
.max()
.copied()
.unwrap_or_default()
} else {
0
};
let mut stdout = io::stdout().lock();
for (_, line, strip_start_idx, strip_end_idx) in results.into_iter() {
writeln!(
stdout,
"{:>alignment_width$}{}{}{}{}",
&line[..strip_start_idx],
if cli.color { COLOR_START } else { "" },
&line[strip_start_idx..strip_end_idx],
if cli.color { COLOR_END } else { "" },
&line[strip_end_idx..],
)?;
}
Ok(())
}