screencap-bot/src/shows/enumeration.rs

348 lines
17 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use std::{
collections::{BTreeSet, HashMap},
path::PathBuf,
};
use lazy_static::lazy_static;
use log::{debug, trace, warn};
use regex::Regex;
use super::{EpisodeNumber, Episodes};
lazy_static! {
static ref NUMBER_REGEX: Regex = Regex::new("[0-9]+").unwrap();
}
// An association of numbers to items, sorted by number.
//
// We use a BTreeSet of pairs rather than a BTreeMap so that
// duplicates are preserved.
type Enumeration<T> = BTreeSet<(u32, T)>;
fn consecutiveness<T>(enumeration: &Enumeration<T>) -> usize {
enumeration
.iter()
.zip(enumeration.iter().skip(1))
.filter(|((cur, _), (next, _))| next - cur == 1)
.count()
}
fn fully_consecutive<T>(enumeration: &Enumeration<T>) -> bool {
consecutiveness(enumeration) == enumeration.len() - 1
}
// Take a series of items that can be represented as strings. For each
// string prefix which is followed by a number in one or more items'
// representations, record what the number is for each item that has
// that prefix.
fn prefix_enumerations<'a, T: Copy + Ord, I: Iterator<Item = T>, F: Fn(T) -> &'a str>(
items: I,
f: F,
) -> HashMap<&'a str, Enumeration<T>> {
let mut enumerations: HashMap<&str, Enumeration<T>> = HashMap::new();
for item in items {
let name = f(item);
for m in NUMBER_REGEX.find_iter(name) {
let num = m.as_str().parse();
if let Err(e) = num {
warn!("Failed to parse candidate number: {}", e);
continue;
}
let num = num.unwrap();
let prefix = name.get(..m.start()).unwrap();
trace!("{}: candidate prefix {}, number {}", name, prefix, num);
enumerations.entry(prefix).or_default().insert((num, item));
}
}
enumerations
}
pub fn enumerate_episodes(files: Vec<PathBuf>) -> Option<Episodes> {
let mut episode_enumerations = prefix_enumerations(files.iter(), |path| {
path.file_name().unwrap().to_str().unwrap()
});
// Retain only prefix enumerations that produce consecutive numbers
episode_enumerations.retain(|_, en| consecutiveness(en) > 0);
let mut result: Episodes = HashMap::new();
if episode_enumerations.len() > 1 {
// There are multiple viable episode enumerations, which may
// mean that this list of files includes multiple seasons of a
// series. Look for a fully consecutive prefix enumeration *of
// the candidate episode prefixes* to find season numbers.
let season_enumerations = prefix_enumerations(episode_enumerations.keys(), |s| s);
let best_season_enumeration = season_enumerations
.into_values()
.max_by_key(consecutiveness);
if let Some(season_enumeration) = best_season_enumeration {
if season_enumeration.len() > 1 && fully_consecutive(&season_enumeration) {
for (season_num, prefix) in season_enumeration.into_iter() {
for (episode_num, path) in episode_enumerations.get(prefix).unwrap().iter() {
debug!(
"Season {} episode {}: {}",
season_num,
episode_num,
path.display()
);
if let Some(dup) = result.insert(
EpisodeNumber::MultiSeason(season_num, *episode_num),
path.to_path_buf(),
) {
warn!(
"Duplicate episode number, discarding file {}",
dup.display()
);
}
}
}
return Some(result);
}
}
}
// No evidence found for multiple seasons, so just take the best episode enumeration
let best_episode_enumeration = episode_enumerations
.into_values()
.max_by_key(consecutiveness)?;
for (num, path) in best_episode_enumeration.into_iter() {
debug!("Episode {}: {}", num, path.display());
if let Some(dup) = result.insert(EpisodeNumber::SingleSeason(num), path.to_path_buf()) {
warn!(
"Duplicate episode number, discarding file {}",
dup.display()
);
}
}
Some(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_enumerate_single_season() {
let files = vec![
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 07v2 (1080p) [BBF1FDA4].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 08 (1080p) [95D16D74].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 04v2 (1080p) [D291A3B0].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 11v2 (1080p) [B7B3ECB6].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 05 (1080p) [16CC6267].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 02v2 (1080p) [5E88C757].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 03v2 (1080p) [5F5AD4BD].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 01v2 (1080p) [8C038972].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 12 (1080p) [179132FA].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 06v2 (1080p) [21EA6641].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 09 (1080p) [5A9C6CEC].mkv",
),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 10 (1080p) [97953FA7].mkv",
),
];
let expected = Some(HashMap::from([
(
EpisodeNumber::SingleSeason(1),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 01v2 (1080p) [8C038972].mkv",
),
),
(
EpisodeNumber::SingleSeason(2),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 02v2 (1080p) [5E88C757].mkv",
),
),
(
EpisodeNumber::SingleSeason(3),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 03v2 (1080p) [5F5AD4BD].mkv",
),
),
(
EpisodeNumber::SingleSeason(4),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 04v2 (1080p) [D291A3B0].mkv",
),
),
(
EpisodeNumber::SingleSeason(5),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 05 (1080p) [16CC6267].mkv",
),
),
(
EpisodeNumber::SingleSeason(6),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 06v2 (1080p) [21EA6641].mkv",
),
),
(
EpisodeNumber::SingleSeason(7),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 07v2 (1080p) [BBF1FDA4].mkv",
),
),
(
EpisodeNumber::SingleSeason(8),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 08 (1080p) [95D16D74].mkv",
),
),
(
EpisodeNumber::SingleSeason(9),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 09 (1080p) [5A9C6CEC].mkv",
),
),
(
EpisodeNumber::SingleSeason(10),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 10 (1080p) [97953FA7].mkv",
),
),
(
EpisodeNumber::SingleSeason(11),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 11v2 (1080p) [B7B3ECB6].mkv",
),
),
(
EpisodeNumber::SingleSeason(12),
PathBuf::from(
"media/Chainsaw Man/[SubsPlease] Chainsaw Man - 12 (1080p) [179132FA].mkv",
),
),
]));
assert_eq!(enumerate_episodes(files), expected);
}
#[test]
fn test_enumerate_two_seasons_with_number_in_title() {
let files = vec![
PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E04 - A Reason to Fight (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E03 - Allelujah Rescue Operation (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E05 - Homeland Burning (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E02 - Gundam Meisters (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E02 - Twin Drive (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E01 - Celestial Being (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E06 - Scars (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E04 - International Negotiation (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E05 - Escape Limit Zone (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E03 - The Changing World (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E01 - The Angels' Second Advent (720p - DUAL Audio).mkv"),
PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E06 - Seven Swords (720p - DUAL Audio).mkv"),
];
let expected = Some(HashMap::from([
(EpisodeNumber::MultiSeason(1, 1), PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E01 - Celestial Being (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(1, 2), PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E02 - Gundam Meisters (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(1, 3), PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E03 - The Changing World (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(1, 4), PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E04 - International Negotiation (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(1, 5), PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E05 - Escape Limit Zone (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(1, 6), PathBuf::from("media/MSG 00/MS Gundam 00 - S01 E06 - Seven Swords (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(2, 1), PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E01 - The Angels' Second Advent (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(2, 2), PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E02 - Twin Drive (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(2, 3), PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E03 - Allelujah Rescue Operation (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(2, 4), PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E04 - A Reason to Fight (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(2, 5), PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E05 - Homeland Burning (720p - DUAL Audio).mkv")),
(EpisodeNumber::MultiSeason(2, 6), PathBuf::from("media/MSG 00/MS Gundam 00 - S02 E06 - Scars (720p - DUAL Audio).mkv")),
]));
assert_eq!(enumerate_episodes(files), expected);
}
#[test]
fn test_enumerate_non_ascii() {
let files = vec![
PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 03 機動戦士ガンダム Twilight AXIS 第3話 [720p].mkv"),
PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 06 機動戦士ガンダム Twilight AXIS 第6話 [720p].mkv"),
PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 02 機動戦士ガンダム Twilight AXIS 第2話 [720p].mkv"),
PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 04 機動戦士ガンダム Twilight AXIS 第4話 [720p].mkv"),
PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 01 機動戦士ガンダム Twilight AXIS 第1話 [720p].mkv"),
PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 05 機動戦士ガンダム Twilight AXIS 第5話 [720p].mkv"),
];
let expected = Some(HashMap::from([
(EpisodeNumber::SingleSeason(1), PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 01 機動戦士ガンダム Twilight AXIS 第1話 [720p].mkv")),
(EpisodeNumber::SingleSeason(2), PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 02 機動戦士ガンダム Twilight AXIS 第2話 [720p].mkv")),
(EpisodeNumber::SingleSeason(3), PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 03 機動戦士ガンダム Twilight AXIS 第3話 [720p].mkv")),
(EpisodeNumber::SingleSeason(4), PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 04 機動戦士ガンダム Twilight AXIS 第4話 [720p].mkv")),
(EpisodeNumber::SingleSeason(5), PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 05 機動戦士ガンダム Twilight AXIS 第5話 [720p].mkv")),
(EpisodeNumber::SingleSeason(6), PathBuf::from("media/Twilight AXIS/[(́◉◞౪◟◉‵)] Mobile Suit Gundam Twilight AXIS - 06 機動戦士ガンダム Twilight AXIS 第6話 [720p].mkv")),
]));
assert_eq!(enumerate_episodes(files), expected);
}
#[test]
fn test_enumerate_extraneous_numbered_items() {
let files = vec![
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_03_Resub(720p)[0A99BA5D].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_OP1_Resub(720p)[A5ADDABF].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]Gundam_Frag_(1080p)[546747A1].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_06_Resub(720p)[CAADADF2].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_ED2_Resub(720p)[21717EAF].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_02_Resub(720p)[115ABF72].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_05_Resub(720p)[A0FD098A].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_01_Resub(720p)[7CDE63CD].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_ED1_Resub(720p)[CA9B2E3F].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]Gundam_Frag_II(1080p)[49465CA8].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_04_Resub(720p)[8D219C3C].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_OP2_Resub(720p)[F691D6D5].mkv"),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_OP3_Resub(720p)[BD7DCCE6].mkv"),
];
let expected = Some(HashMap::from([
(
EpisodeNumber::SingleSeason(1),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_01_Resub(720p)[7CDE63CD].mkv"),
),
(
EpisodeNumber::SingleSeason(2),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_02_Resub(720p)[115ABF72].mkv"),
),
(
EpisodeNumber::SingleSeason(3),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_03_Resub(720p)[0A99BA5D].mkv"),
),
(
EpisodeNumber::SingleSeason(4),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_04_Resub(720p)[8D219C3C].mkv"),
),
(
EpisodeNumber::SingleSeason(5),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_05_Resub(720p)[A0FD098A].mkv"),
),
(
EpisodeNumber::SingleSeason(6),
PathBuf::from("media/ZZ Gundam/[EG]ZZ_Gundam_BD_06_Resub(720p)[CAADADF2].mkv"),
),
]));
assert_eq!(enumerate_episodes(files), expected);
}
}