From 59c0518944d48abef7559f49a8e763bf279ac80c Mon Sep 17 00:00:00 2001 From: xenofem Date: Mon, 4 Sep 2023 15:31:00 -0400 Subject: [PATCH] 0.2.0: sorta: add option to align lines, make color an optional flag --- Cargo.lock | 183 ++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 3 +- src/bin/sorta.rs | 98 +++++++++++++++++-------- 3 files changed, 251 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6350fac..e692060 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12,14 +12,115 @@ dependencies = [ ] [[package]] -name = "evenmoreutils" -version = "0.1.0" +name = "anstream" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea" + +[[package]] +name = "anstyle-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "clap" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "evenmoreutils" +version = "0.2.0" +dependencies = [ + "clap", "lazy_static", "regex", "thiserror", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "lazy_static" version = "1.4.0" @@ -79,6 +180,12 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + [[package]] name = "syn" version = "2.0.27" @@ -115,3 +222,75 @@ name = "unicode-ident" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/Cargo.toml b/Cargo.toml index eb35553..72bdbd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,12 @@ [package] name = "evenmoreutils" -version = "0.1.0" +version = "0.2.0" edition = "2021" authors = ["xenofem "] license = "MIT" [dependencies] +clap = { version = "4.3", features = ["derive"] } lazy_static = "1.4" regex = "1.7" thiserror = "1.0" diff --git a/src/bin/sorta.rs b/src/bin/sorta.rs index 86078d2..cb70987 100644 --- a/src/bin/sorta.rs +++ b/src/bin/sorta.rs @@ -1,35 +1,38 @@ use std::io::{self, BufRead, Write}; +use clap::Parser; use lazy_static::lazy_static; -use regex::{Regex, escape}; +use regex::{escape, Regex}; + +const COLOR_START: &str = "\x1b[01;34m"; +const COLOR_END: &str = "\x1b[0m"; lazy_static! { - static ref PARENS: Vec<(char, char)> = vec![ - ('(', ')'), - ('[', ']'), - ('{', '}'), - ('【', '】'), - ]; - static ref ARTICLES: Vec<&'static str> = vec![ - "a", - "an", - "the", - ]; - static ref CHAFF: Vec<&'static str> = vec![ - "-", - "_", - ".", - "『", - "』", - ]; - static ref EXTENSIONS: Vec<&'static str> = vec![ - "mkv", - "mp4", - "webm", - ]; + static ref PARENS: Vec<(char, char)> = vec![('(', ')'), ('[', ']'), ('{', '}'), ('【', '】')]; + static ref ARTICLES: Vec<&'static str> = vec!["a", "an", "the"]; + static ref CHAFF: Vec<&'static str> = vec!["-", "_", ".", "『", "』"]; + static ref EXTENSIONS: Vec<&'static str> = vec!["mkv", "mp4", "webm"]; } -fn alternator_regex(choices: &Vec<&str>) -> String { +#[derive(Parser, Debug)] +#[clap(author, version)] +/// Sort lines based on the "important" part of each line +/// +/// sorta sorts the lines of stdin by stripping off prefixes such as +/// parenthesized descriptors, articles, etc, sorting based on the +/// stripped versions, and printing the original lines to stdout in +/// sorted order. +struct Cli { + /// Indent lines with spaces to align the important parts of each line + #[arg(short, long)] + align: bool, + + /// Color the important part of each line + #[arg(short, long)] + color: bool, +} + +fn alternator_regex(choices: &[&str]) -> String { let mut result = String::new(); for (i, choice) in choices.iter().enumerate() { if i > 0 { @@ -44,6 +47,8 @@ fn main() -> io::Result<()> { let article_regex_raw = format!(r"\b({})\b", alternator_regex(&ARTICLES)); let extension_regex_raw = format!(r"\.({})", alternator_regex(&EXTENSIONS)); + let cli = Cli::parse(); + let mut space_paren_chaff_regex_raw = String::from(r"\s"); for (open, close) in PARENS.iter() { let open = escape(&open.to_string()); @@ -55,8 +60,14 @@ fn main() -> io::Result<()> { space_paren_chaff_regex_raw.push_str(&escape(chaff)); } - let strip_start_regex = Regex::new(&format!("^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*")).unwrap(); - let strip_end_regex = Regex::new(&format!("(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$")).unwrap(); + let strip_start_regex = Regex::new(&format!( + "^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*" + )) + .unwrap(); + let strip_end_regex = Regex::new(&format!( + "(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$" + )) + .unwrap(); let mut results = Vec::new(); @@ -64,8 +75,16 @@ fn main() -> io::Result<()> { for line in stdin.lock().lines() { let line = line?; let strip_start_idx = strip_start_regex.find(&line).map(|m| m.end()).unwrap_or(0); - let strip_end_idx = strip_end_regex.find_at(&line, strip_start_idx).map(|m| m.start()).unwrap_or(line.len()); - let item = ((&line[strip_start_idx..]).to_lowercase(), line, strip_start_idx, strip_end_idx); + let strip_end_idx = strip_end_regex + .find_at(&line, strip_start_idx) + .map(|m| m.start()) + .unwrap_or(line.len()); + let item = ( + line[strip_start_idx..].to_lowercase(), + line, + strip_start_idx, + strip_end_idx, + ); let insertion_idx = match results.binary_search(&item) { Ok(n) => n, Err(n) => n, @@ -73,9 +92,28 @@ fn main() -> io::Result<()> { results.insert(insertion_idx, item); } + let alignment_width = if cli.align { + results + .iter() + .map(|(_, _, start_idx, _)| start_idx) + .max() + .copied() + .unwrap_or_default() + } else { + 0 + }; + let mut stdout = io::stdout().lock(); for (_, line, strip_start_idx, strip_end_idx) in results.into_iter() { - writeln!(stdout, "{}\x1b[01;34m{}\x1b[0m{}", &line[..strip_start_idx], &line[strip_start_idx..strip_end_idx], &line[strip_end_idx..])?; + writeln!( + stdout, + "{:>alignment_width$}{}{}{}{}", + &line[..strip_start_idx], + if cli.color { COLOR_START } else { "" }, + &line[strip_start_idx..strip_end_idx], + if cli.color { COLOR_END } else { "" }, + &line[strip_end_idx..], + )?; } Ok(()) }