108 lines
3.6 KiB
Rust
108 lines
3.6 KiB
Rust
use clap::{crate_authors, crate_version, App, Arg, ArgGroup};
|
|
use color_eyre::eyre::{Result, WrapErr};
|
|
use rayon::prelude::*;
|
|
use std::io::{self, stdin, stdout, BufRead, BufWriter, Write};
|
|
use strsim::{jaro_winkler, levenshtein};
|
|
|
|
#[global_allocator]
|
|
static A: bump_alloc::BumpAlloc = bump_alloc::BumpAlloc::new();
|
|
|
|
fn main() {
|
|
if let Err(err) = try_main() {
|
|
eprintln!("{:?}", err);
|
|
std::process::exit(1);
|
|
}
|
|
}
|
|
|
|
fn try_main() -> Result<()> {
|
|
color_eyre::install()?;
|
|
|
|
let matches = App::new("similar-sort")
|
|
.version(crate_version!())
|
|
.author(crate_authors!())
|
|
.about(
|
|
"works like `sort`, but sorts according to edit distance instead of alphanumerically.",
|
|
)
|
|
.long_about(
|
|
"works like `sort`, but sorts according to edit distance instead of alphanumerically.\n\nYou can choose the edit distance algorithm we use for this! If you don't know which one you need, Levenshtein is a good default. Try Jaro-Winkler if you care about your strings having similar prefixes (for example files in a project.)"
|
|
)
|
|
.arg(Arg::new("target").about("sort according to distance from this string"))
|
|
.arg(
|
|
Arg::new("levenshtein")
|
|
.long("levenshtein")
|
|
.about("sort according to Levenshtein distance (the default)"),
|
|
)
|
|
.arg(
|
|
Arg::new("jaro-winkler")
|
|
.long("jaro-winkler")
|
|
.about("sort according to Jaro-Winkler edit distance"),
|
|
)
|
|
.group(
|
|
ArgGroup::new("edit-method")
|
|
.arg("levenshtein")
|
|
.arg("jaro-winkler")
|
|
)
|
|
.arg(
|
|
Arg::new("stable-sort")
|
|
.long("stable-sort")
|
|
.about("use a stable sort")
|
|
.long_about("use a stable sort. This may affect performance. Measure if that matters for your use-case!")
|
|
)
|
|
.get_matches();
|
|
|
|
let target = matches.value_of("target").unwrap_or("");
|
|
|
|
let lines: Vec<String> = stdin()
|
|
.lock()
|
|
.lines()
|
|
.collect::<io::Result<Vec<String>>>()
|
|
.context("could not read lines from stdin")?;
|
|
|
|
let mut out = BufWriter::new(stdout());
|
|
|
|
if matches.is_present("jaro-winkler") {
|
|
let mut distances: Vec<(f64, &String)> = lines
|
|
.par_iter()
|
|
.map(|candidate| (jaro_winkler(target, candidate), candidate))
|
|
.collect();
|
|
|
|
if matches.is_present("stable-sort") {
|
|
distances.par_sort_by(|x, y| {
|
|
x.0.partial_cmp(&y.0)
|
|
.unwrap_or(std::cmp::Ordering::Equal)
|
|
.reverse()
|
|
});
|
|
} else {
|
|
distances.par_sort_unstable_by(|x, y| {
|
|
x.0.partial_cmp(&y.0)
|
|
.unwrap_or(std::cmp::Ordering::Equal)
|
|
.reverse()
|
|
});
|
|
}
|
|
|
|
for (_, candidate) in distances {
|
|
writeln!(out, "{}", candidate).context("could not write to stdout")?;
|
|
}
|
|
} else {
|
|
// levenshtein, the default
|
|
let mut distances: Vec<(usize, &String)> = lines
|
|
.par_iter()
|
|
.map(|candidate| (levenshtein(target, candidate), candidate))
|
|
.collect();
|
|
|
|
if matches.is_present("stable-sort") {
|
|
distances.par_sort_by_key(|x| x.0);
|
|
} else {
|
|
distances.par_sort_unstable_by_key(|x| x.0);
|
|
}
|
|
|
|
for (_, candidate) in distances {
|
|
writeln!(out, "{}", candidate).context("could not write to stdout")?;
|
|
}
|
|
};
|
|
|
|
out.flush().context("could not finish writing to stdout")?;
|
|
|
|
Ok(())
|
|
}
|