120 lines
3.6 KiB
Rust
120 lines
3.6 KiB
Rust
use std::io::{self, BufRead, Write};
|
|
|
|
use clap::Parser;
|
|
use lazy_static::lazy_static;
|
|
use regex::{escape, Regex};
|
|
|
|
const COLOR_START: &str = "\x1b[01;34m";
|
|
const COLOR_END: &str = "\x1b[0m";
|
|
|
|
lazy_static! {
|
|
static ref PARENS: Vec<(char, char)> = vec![('(', ')'), ('[', ']'), ('{', '}'), ('【', '】')];
|
|
static ref ARTICLES: Vec<&'static str> = vec!["a", "an", "the"];
|
|
static ref CHAFF: Vec<&'static str> = vec!["-", "_", ".", "『", "』"];
|
|
static ref EXTENSIONS: Vec<&'static str> = vec!["mkv", "mp4", "webm"];
|
|
}
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[clap(author, version)]
|
|
/// Sort lines based on the "important" part of each line
|
|
///
|
|
/// sorta sorts the lines of stdin by stripping off prefixes such as
|
|
/// parenthesized descriptors, articles, etc, sorting based on the
|
|
/// stripped versions, and printing the original lines to stdout in
|
|
/// sorted order.
|
|
struct Cli {
|
|
/// Indent lines with spaces to align the important parts of each line
|
|
#[arg(short, long)]
|
|
align: bool,
|
|
|
|
/// Color the important part of each line
|
|
#[arg(short, long)]
|
|
color: bool,
|
|
}
|
|
|
|
fn alternator_regex(choices: &[&str]) -> String {
|
|
let mut result = String::new();
|
|
for (i, choice) in choices.iter().enumerate() {
|
|
if i > 0 {
|
|
result.push('|');
|
|
}
|
|
result.push_str(&escape(choice));
|
|
}
|
|
result
|
|
}
|
|
|
|
fn main() -> io::Result<()> {
|
|
let article_regex_raw = format!(r"\b({})\b", alternator_regex(&ARTICLES));
|
|
let extension_regex_raw = format!(r"\.({})", alternator_regex(&EXTENSIONS));
|
|
|
|
let cli = Cli::parse();
|
|
|
|
let mut space_paren_chaff_regex_raw = String::from(r"\s");
|
|
for (open, close) in PARENS.iter() {
|
|
let open = escape(&open.to_string());
|
|
let close = escape(&close.to_string());
|
|
space_paren_chaff_regex_raw.push_str(&format!("|{open}[^{close}]+{close}"));
|
|
}
|
|
for chaff in CHAFF.iter() {
|
|
space_paren_chaff_regex_raw.push('|');
|
|
space_paren_chaff_regex_raw.push_str(&escape(chaff));
|
|
}
|
|
|
|
let strip_start_regex = Regex::new(&format!(
|
|
"^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*"
|
|
))
|
|
.unwrap();
|
|
let strip_end_regex = Regex::new(&format!(
|
|
"(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$"
|
|
))
|
|
.unwrap();
|
|
|
|
let mut results = Vec::new();
|
|
|
|
let stdin = io::stdin();
|
|
for line in stdin.lock().lines() {
|
|
let line = line?;
|
|
let strip_start_idx = strip_start_regex.find(&line).map(|m| m.end()).unwrap_or(0);
|
|
let strip_end_idx = strip_end_regex
|
|
.find_at(&line, strip_start_idx)
|
|
.map(|m| m.start())
|
|
.unwrap_or(line.len());
|
|
let item = (
|
|
line[strip_start_idx..].to_lowercase(),
|
|
line,
|
|
strip_start_idx,
|
|
strip_end_idx,
|
|
);
|
|
let insertion_idx = match results.binary_search(&item) {
|
|
Ok(n) => n,
|
|
Err(n) => n,
|
|
};
|
|
results.insert(insertion_idx, item);
|
|
}
|
|
|
|
let alignment_width = if cli.align {
|
|
results
|
|
.iter()
|
|
.map(|(_, _, start_idx, _)| start_idx)
|
|
.max()
|
|
.copied()
|
|
.unwrap_or_default()
|
|
} else {
|
|
0
|
|
};
|
|
|
|
let mut stdout = io::stdout().lock();
|
|
for (_, line, strip_start_idx, strip_end_idx) in results.into_iter() {
|
|
writeln!(
|
|
stdout,
|
|
"{:>alignment_width$}{}{}{}{}",
|
|
&line[..strip_start_idx],
|
|
if cli.color { COLOR_START } else { "" },
|
|
&line[strip_start_idx..strip_end_idx],
|
|
if cli.color { COLOR_END } else { "" },
|
|
&line[strip_end_idx..],
|
|
)?;
|
|
}
|
|
Ok(())
|
|
}
|