0.2.0: sorta: add option to align lines, make color an optional flag

This commit is contained in:
xenofem 2023-09-04 15:31:00 -04:00
parent 4bbaab46dd
commit 59c0518944
3 changed files with 251 additions and 33 deletions

183
Cargo.lock generated
View file

@ -12,14 +12,115 @@ dependencies = [
] ]
[[package]] [[package]]
name = "evenmoreutils" name = "anstream"
version = "0.1.0" version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c"
dependencies = [ dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea"
[[package]]
name = "anstyle-parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "clap"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961"
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "evenmoreutils"
version = "0.2.0"
dependencies = [
"clap",
"lazy_static", "lazy_static",
"regex", "regex",
"thiserror", "thiserror",
] ]
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"
@ -79,6 +180,12 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.27" version = "2.0.27"
@ -115,3 +222,75 @@ name = "unicode-ident"
version = "1.0.11" version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"

View file

@ -1,11 +1,12 @@
[package] [package]
name = "evenmoreutils" name = "evenmoreutils"
version = "0.1.0" version = "0.2.0"
edition = "2021" edition = "2021"
authors = ["xenofem <xenofem@xeno.science>"] authors = ["xenofem <xenofem@xeno.science>"]
license = "MIT" license = "MIT"
[dependencies] [dependencies]
clap = { version = "4.3", features = ["derive"] }
lazy_static = "1.4" lazy_static = "1.4"
regex = "1.7" regex = "1.7"
thiserror = "1.0" thiserror = "1.0"

View file

@ -1,35 +1,38 @@
use std::io::{self, BufRead, Write}; use std::io::{self, BufRead, Write};
use clap::Parser;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use regex::{Regex, escape}; use regex::{escape, Regex};
const COLOR_START: &str = "\x1b[01;34m";
const COLOR_END: &str = "\x1b[0m";
lazy_static! { lazy_static! {
static ref PARENS: Vec<(char, char)> = vec![ static ref PARENS: Vec<(char, char)> = vec![('(', ')'), ('[', ']'), ('{', '}'), ('【', '】')];
('(', ')'), static ref ARTICLES: Vec<&'static str> = vec!["a", "an", "the"];
('[', ']'), static ref CHAFF: Vec<&'static str> = vec!["-", "_", ".", "", ""];
('{', '}'), static ref EXTENSIONS: Vec<&'static str> = vec!["mkv", "mp4", "webm"];
('【', '】'),
];
static ref ARTICLES: Vec<&'static str> = vec![
"a",
"an",
"the",
];
static ref CHAFF: Vec<&'static str> = vec![
"-",
"_",
".",
"",
"",
];
static ref EXTENSIONS: Vec<&'static str> = vec![
"mkv",
"mp4",
"webm",
];
} }
fn alternator_regex(choices: &Vec<&str>) -> String { #[derive(Parser, Debug)]
#[clap(author, version)]
/// Sort lines based on the "important" part of each line
///
/// sorta sorts the lines of stdin by stripping off prefixes such as
/// parenthesized descriptors, articles, etc, sorting based on the
/// stripped versions, and printing the original lines to stdout in
/// sorted order.
struct Cli {
/// Indent lines with spaces to align the important parts of each line
#[arg(short, long)]
align: bool,
/// Color the important part of each line
#[arg(short, long)]
color: bool,
}
fn alternator_regex(choices: &[&str]) -> String {
let mut result = String::new(); let mut result = String::new();
for (i, choice) in choices.iter().enumerate() { for (i, choice) in choices.iter().enumerate() {
if i > 0 { if i > 0 {
@ -44,6 +47,8 @@ fn main() -> io::Result<()> {
let article_regex_raw = format!(r"\b({})\b", alternator_regex(&ARTICLES)); let article_regex_raw = format!(r"\b({})\b", alternator_regex(&ARTICLES));
let extension_regex_raw = format!(r"\.({})", alternator_regex(&EXTENSIONS)); let extension_regex_raw = format!(r"\.({})", alternator_regex(&EXTENSIONS));
let cli = Cli::parse();
let mut space_paren_chaff_regex_raw = String::from(r"\s"); let mut space_paren_chaff_regex_raw = String::from(r"\s");
for (open, close) in PARENS.iter() { for (open, close) in PARENS.iter() {
let open = escape(&open.to_string()); let open = escape(&open.to_string());
@ -55,8 +60,14 @@ fn main() -> io::Result<()> {
space_paren_chaff_regex_raw.push_str(&escape(chaff)); space_paren_chaff_regex_raw.push_str(&escape(chaff));
} }
let strip_start_regex = Regex::new(&format!("^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*")).unwrap(); let strip_start_regex = Regex::new(&format!(
let strip_end_regex = Regex::new(&format!("(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$")).unwrap(); "^(?i)({article_regex_raw}|{space_paren_chaff_regex_raw})*"
))
.unwrap();
let strip_end_regex = Regex::new(&format!(
"(?i)({space_paren_chaff_regex_raw})*({extension_regex_raw})?$"
))
.unwrap();
let mut results = Vec::new(); let mut results = Vec::new();
@ -64,8 +75,16 @@ fn main() -> io::Result<()> {
for line in stdin.lock().lines() { for line in stdin.lock().lines() {
let line = line?; let line = line?;
let strip_start_idx = strip_start_regex.find(&line).map(|m| m.end()).unwrap_or(0); let strip_start_idx = strip_start_regex.find(&line).map(|m| m.end()).unwrap_or(0);
let strip_end_idx = strip_end_regex.find_at(&line, strip_start_idx).map(|m| m.start()).unwrap_or(line.len()); let strip_end_idx = strip_end_regex
let item = ((&line[strip_start_idx..]).to_lowercase(), line, strip_start_idx, strip_end_idx); .find_at(&line, strip_start_idx)
.map(|m| m.start())
.unwrap_or(line.len());
let item = (
line[strip_start_idx..].to_lowercase(),
line,
strip_start_idx,
strip_end_idx,
);
let insertion_idx = match results.binary_search(&item) { let insertion_idx = match results.binary_search(&item) {
Ok(n) => n, Ok(n) => n,
Err(n) => n, Err(n) => n,
@ -73,9 +92,28 @@ fn main() -> io::Result<()> {
results.insert(insertion_idx, item); results.insert(insertion_idx, item);
} }
let alignment_width = if cli.align {
results
.iter()
.map(|(_, _, start_idx, _)| start_idx)
.max()
.copied()
.unwrap_or_default()
} else {
0
};
let mut stdout = io::stdout().lock(); let mut stdout = io::stdout().lock();
for (_, line, strip_start_idx, strip_end_idx) in results.into_iter() { for (_, line, strip_start_idx, strip_end_idx) in results.into_iter() {
writeln!(stdout, "{}\x1b[01;34m{}\x1b[0m{}", &line[..strip_start_idx], &line[strip_start_idx..strip_end_idx], &line[strip_end_idx..])?; writeln!(
stdout,
"{:>alignment_width$}{}{}{}{}",
&line[..strip_start_idx],
if cli.color { COLOR_START } else { "" },
&line[strip_start_idx..strip_end_idx],
if cli.color { COLOR_END } else { "" },
&line[strip_end_idx..],
)?;
} }
Ok(()) Ok(())
} }