From 47eb454f0ce848c6d135b3109dcaa00bb0f87bd7 Mon Sep 17 00:00:00 2001 From: xenofem Date: Sun, 30 Jul 2023 00:50:53 -0400 Subject: [PATCH] 1.4.0-beta: add audio captures (audio metadata isn't quite working yet in eggbug-rs fork) --- Cargo.lock | 6 ++- Cargo.toml | 7 +-- README.md | 23 ++++++-- src/config.rs | 8 +++ src/main.rs | 107 +++++++++++++++++++++++++------------ src/{video.rs => media.rs} | 102 +++++++++++++++++++++++------------ src/shows/mod.rs | 48 ++++++++++++++--- 7 files changed, 219 insertions(+), 82 deletions(-) rename src/{video.rs => media.rs} (56%) diff --git a/Cargo.lock b/Cargo.lock index f4ec8be..0014348 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -270,7 +270,7 @@ checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" [[package]] name = "eggbug" version = "0.1.3" -source = "git+https://github.com/iliana/eggbug-rs.git?branch=main#94fc2f652a842b0fadfff62750562630e887672a" +source = "git+https://github.com/xenofem/eggbug-rs.git?branch=audio-attachments#6bcf51bceb3745f96ef0c9026d3143093fec032b" dependencies = [ "base64 0.13.1", "bytes", @@ -1219,7 +1219,7 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "screencap-bot" -version = "1.3.0" +version = "1.4.0-beta" dependencies = [ "anyhow", "dotenvy", @@ -1236,6 +1236,7 @@ dependencies = [ "serde_yaml", "tempfile", "tokio", + "tracing", ] [[package]] @@ -1582,6 +1583,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" dependencies = [ "cfg-if", + "log", "pin-project-lite", "tracing-attributes", "tracing-core", diff --git a/Cargo.toml b/Cargo.toml index a64add4..14f6b73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "screencap-bot" -version = "1.3.0" +version = "1.4.0-beta" edition = "2021" authors = ["xenofem "] license = "MIT" @@ -8,7 +8,7 @@ license = "MIT" [dependencies] anyhow = "1.0.71" dotenvy = "0.15.7" -eggbug = { git = "https://github.com/iliana/eggbug-rs.git", branch = "main" } +eggbug = { git = "https://github.com/xenofem/eggbug-rs.git", branch = "audio-attachments" } env_logger = "0.10" ffmpeg-next = "6.0.0" imagesize = "0.12.0" @@ -20,4 +20,5 @@ serde = "1" serde_with = "3" serde_yaml = "0.9.22" tempfile = "3.6.0" -tokio = { version = "1.28.2", features = ["full"] } \ No newline at end of file +tokio = { version = "1.28.2", features = ["full"] } +tracing = { version = "0.1", features = ["log"] } \ No newline at end of file diff --git a/README.md b/README.md index 81eb594..6270d36 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # screencap-bot this is a cohost bot that periodically posts randomly-chosen -screencaps from a configured collection of tv series/movies. +screencaps or audio clips from a configured collection of tv +series/movies/podcasts/etc. ## installation @@ -17,15 +18,19 @@ screencap-bot is configured with the following environment variables, which can also be put in a `.env` file in the program's working directory: -- `SCREENCAP_BOT_SHOWS_FILE`: path of a YAML file specifying what shows to take screencaps from (default: `./shows.yaml`) +- `SCREENCAP_BOT_CAPTURE_IMAGES`: whether to take screenshots (default: `true`) +- `SCREENCAP_BOT_CAPTURE_AUDIO_DURATION`: length of audio clips to capture, in seconds (default: unset, no audio capture) +- `SCREENCAP_BOT_SHOWS_FILE`: path of a YAML file specifying what shows to take captures from (default: `./shows.yaml`) - `SCREENCAP_BOT_GLOBAL_TAGS`: tags to put on every post the bot makes, as a comma-separated list (eg `bot account,automated post,The Cohost Bot Feed`) (default: none) - `SCREENCAP_BOT_POST_INTERVAL`: the interval between posts, in - seconds (default: 0, post a single screencap and then exit) + seconds (default: 0, post a single capture and then exit) - `SCREENCAP_BOT_COHOST_EMAIL`: the email address the bot should use to log into cohost - `SCREENCAP_BOT_COHOST_PASSWORD`: the password the bot should use to log into cohost - `SCREENCAP_BOT_COHOST_PAGE`: the cohost page the bot should post from - `SCREENCAP_BOT_COHOST_DRAFT`: whether to create cohost posts as drafts, eg for testing (default: `false`) +- `SCREENCAP_BOT_COHOST_CW`: whether to CW posts with the episode + number (default: `true` if taking screenshots, `false` if not) - `SCREENCAP_BOT_18PLUS`: whether posts should be flagged as containing 18+ content (default: `false`). this can be overridden for individual shows, see below. @@ -56,6 +61,11 @@ MS IGLOO: Gundam 0069: path: /home/user/media/Gundam 0069 18+: true +Friends at the Table: + path: /home/user/media/Friends at the Table + custom_episodes: + prefix: "Friends at the Table: " + regex: '^\d{4}-\d{2}-\d{2} - (?.*)\.mp3$' ``` each top-level key is a show title, which will be used in spoiler @@ -75,3 +85,10 @@ warnings on posts and in image alt text. each show has two keys: - `18+`: an optional setting for whether screencaps from this show should be flagged as containing 18+ content. if present, this takes precedence over the `SCREENCAP_BOT_18PLUS` environment variable. +- `custom_episodes`: Rather than letting the bot auto-detect episode + numbering, you can extract episode numbers from filenames using a regex. + + `regex`: should match a filename, and capture the episode number + or title in a capture group named `episode`. Files that don't + match the regex will be ignored. + + `prefix`: Will be prepended to whatever is captured by the + regex. (default: empty string) diff --git a/src/config.rs b/src/config.rs index 15cf027..6d461cb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -7,6 +7,8 @@ use std::{ }; pub struct Config { + pub capture_images: bool, + pub capture_audio_duration: Option, pub shows_file: PathBuf, pub global_tags: Vec, pub post_interval: Duration, @@ -14,6 +16,7 @@ pub struct Config { pub cohost_password: String, pub cohost_page: String, pub cohost_draft: bool, + pub cohost_cw: bool, pub eighteen_plus: bool, } @@ -35,7 +38,11 @@ fn expect_var(name: &str) -> String { } pub fn load() -> Config { + let capture_images = parse_var("CAPTURE_IMAGES").unwrap_or(true); + Config { + capture_images, + capture_audio_duration: parse_var("CAPTURE_AUDIO_DURATION").ok(), shows_file: parse_var("SHOWS_FILE").unwrap_or(PathBuf::from("./shows.yaml")), global_tags: get_var("GLOBAL_TAGS") .map(|s| s.split(',').map(String::from).collect()) @@ -45,6 +52,7 @@ pub fn load() -> Config { cohost_password: expect_var("COHOST_PASSWORD"), cohost_page: expect_var("COHOST_PAGE"), cohost_draft: parse_var("COHOST_DRAFT").unwrap_or(false), + cohost_cw: parse_var("COHOST_CW").unwrap_or(capture_images), eighteen_plus: parse_var("18PLUS").unwrap_or(false), } } diff --git a/src/main.rs b/src/main.rs index 97f5133..1981bce 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,8 +12,8 @@ use rand::{ use shows::Shows; mod config; +mod media; mod shows; -mod video; lazy_static! { static ref RETRY_INTERVAL: Duration = Duration::from_secs(30); @@ -29,6 +29,12 @@ async fn main() -> anyhow::Result<()> { let conf = config::load(); + if let (false, None) = (conf.capture_images, conf.capture_audio_duration) { + return Err(anyhow!( + "At least one of image capture and audio capture must be enabled!" + )); + } + info!("Loading shows from {}", conf.shows_file.display()); let shows = shows::load(&conf.shows_file).with_context(|| { format!( @@ -49,9 +55,9 @@ async fn main() -> anyhow::Result<()> { .context("Failed to login to cohost")?; loop { - let result = post_random_screencap(&conf, &shows, &session, &dist, &mut rng) + let result = post_random_capture(&conf, &shows, &session, &dist, &mut rng) .await - .context("Failed to post a random screencap"); + .context("Failed to post a random capture"); if conf.post_interval == Duration::ZERO { return result; @@ -68,7 +74,7 @@ async fn main() -> anyhow::Result<()> { } } -async fn post_random_screencap( +async fn post_random_capture( conf: &Config, shows: &Shows, session: &eggbug::Session, @@ -85,47 +91,73 @@ async fn post_random_screencap( })?; let (num, file) = episodes.iter().choose(rng).unwrap(); - let descriptor = shows::display_show_episode(show, *num); + let descriptor = shows::display_show_episode(show, num); info!("Selected: {} - {}", descriptor, file.display()); - let video_info = video::get_video_info(file, Some("eng")).with_context(|| { - format!( - "Failed to get duration and subtitle stream index for video file {}", - file.display() - ) - })?; + let media_info = media::get_media_info(file, Some("eng")) + .with_context(|| format!("Failed to get info for media file {}", file.display()))?; debug!( - "Video duration: {}", - format_timestamp(video_info.duration_secs, None) + "Media duration: {}", + format_timestamp(media_info.duration_secs, None) ); debug!( "Subtitle stream index: {:?}", - video_info.subtitle_stream_index + media_info.subtitle_stream_index ); - let timestamp = video_info.duration_secs * rng.sample::(Standard); - let formatted_timestamp = format_timestamp(timestamp, Some(video_info.duration_secs)); - info!("Taking screencap at {}", formatted_timestamp); + let max_timestamp = match conf.capture_audio_duration { + Some(d) => media_info.duration_secs - d, + None => media_info.duration_secs, + }; + let timestamp = max_timestamp * rng.sample::(Standard); + let formatted_timestamp = format_timestamp(timestamp, Some(media_info.duration_secs)); + info!("Taking capture at {}", formatted_timestamp); - let img_data = video::take_screencap(file, timestamp, video_info.subtitle_stream_index) - .await - .context("Failed to take screencap")?; + let mut attachments = Vec::new(); - let img_size = imagesize::blob_size(&img_data) - .context("Failed to get image size for screencap image data")?; + if conf.capture_images { + let image_data = media::take_screencap(file, timestamp, media_info.subtitle_stream_index) + .await + .context("Failed to take screencap")?; - let attachment = eggbug::Attachment::new( - img_data, - format!("{} @{}.png", descriptor, formatted_timestamp), - String::from("image/png"), - Some(img_size.width as u32), - Some(img_size.height as u32), - ) - .with_alt_text(format!( - "Screencap of {} at {}", - descriptor, formatted_timestamp - )); + let image_size = imagesize::blob_size(&image_data) + .context("Failed to get image size for screencap image data")?; + + let image_attachment = eggbug::Attachment::new( + image_data, + format!("{} @{}.png", descriptor, formatted_timestamp), + String::from("image/png"), + eggbug::MediaMetadata::Image { + width: Some(image_size.width as u32), + height: Some(image_size.height as u32), + }, + ) + .with_alt_text(format!( + "Screencap of {} at {}", + descriptor, formatted_timestamp + )); + + attachments.push(image_attachment); + } + + if let Some(duration) = conf.capture_audio_duration { + let audio_data = media::take_audio_clip(file, timestamp, duration) + .await + .context("Failed to take audio clip")?; + + let audio_attachment = eggbug::Attachment::new( + audio_data, + format!("{} @{}.mp3", descriptor, formatted_timestamp), + String::from("audio/mpeg"), + eggbug::MediaMetadata::Audio { + artist: show.title.clone(), + title: descriptor.clone(), + }, + ); + + attachments.push(audio_attachment); + } let mut tags = show.tags.clone(); tags.extend_from_slice(&conf.global_tags); @@ -134,14 +166,19 @@ async fn post_random_screencap( .create_post( &conf.cohost_page, &mut eggbug::Post { - content_warnings: vec![descriptor], - attachments: vec![attachment], + content_warnings: if conf.cohost_cw { + vec![descriptor] + } else { + vec![] + }, + attachments, tags, draft: conf.cohost_draft, adult_content: show.eighteen_plus.unwrap_or(conf.eighteen_plus), headline: String::new(), markdown: String::new(), metadata: None, + ask: None, }, ) .await diff --git a/src/video.rs b/src/media.rs similarity index 56% rename from src/video.rs rename to src/media.rs index 724d75d..fcd3e63 100644 --- a/src/video.rs +++ b/src/media.rs @@ -4,6 +4,7 @@ use ffmpeg_next::{ media::Type, }; use lazy_static::lazy_static; +use log::debug; use regex::Regex; use tempfile::tempdir; use tokio::{fs, process::Command}; @@ -14,19 +15,20 @@ lazy_static! { static ref SUBTITLE_FORBID_REGEX: Regex = Regex::new("(?i)sign|song").unwrap(); } -pub struct VideoInfo { +pub struct MediaInfo { pub duration_secs: f64, // The index among the subtitle streams, not among the streams in general pub subtitle_stream_index: Option, } -pub fn get_video_info>( +pub fn get_media_info>( source: &P, subtitle_lang: Option<&str>, -) -> anyhow::Result { - let ctx = input(source).context("Failed to load video file")?; +) -> anyhow::Result { + let ctx = input(source).context("Failed to load media file")?; let duration_secs = ctx.duration() as f64 / f64::from(ffmpeg_next::ffi::AV_TIME_BASE); + debug!("{:?}", ctx.metadata()); let subtitle_stream_index = subtitle_lang.and_then(|lang| { ctx.streams() @@ -54,53 +56,40 @@ pub fn get_video_info>( .map(|(idx, _)| idx) }); - Ok(VideoInfo { + Ok(MediaInfo { duration_secs, subtitle_stream_index, }) } -pub async fn take_screencap>( +async fn take_ffmpeg_capture( source: &P, - timestamp_secs: f64, - subtitle_stream_index: Option, -) -> anyhow::Result> { - let ext = source.as_ref().extension().and_then(|s| s.to_str()); - if ext != Some("mkv") && ext != Some("mp4") { + output_ext: &str, + apply_args: F, +) -> anyhow::Result> +where + P: AsRef, + F: FnOnce(&mut Command, &Path, &Path), +{ + let input_ext = source.as_ref().extension().and_then(|s| s.to_str()); + if input_ext.map(|e| e.chars().all(|c| c.is_ascii_alphanumeric())) != Some(true) { return Err(anyhow!( - "Video file {} had unexpected file extension", + "Media file {} had unexpected file extension", source.as_ref().display() )); } let tmp_dir = tempdir() .context("Failed to create temporary directory for ffmpeg input and output files")?; - let link_path = tmp_dir.path().join(format!("in.{}", ext.unwrap())); + let link_path = tmp_dir.path().join(format!("in.{}", input_ext.unwrap())); fs::symlink(source, &link_path) .await - .context("Failed to create symlink for video file")?; - let dest_path = tmp_dir.path().join("out.png"); + .context("Failed to create symlink for input file")?; + let dest_path = tmp_dir.path().join(format!("out.{}", output_ext)); let mut cmd = Command::new("ffmpeg"); - cmd.arg("-ss") - .arg(format!("{:.2}", timestamp_secs)) - .arg("-copyts") - .arg("-i") - .arg(&link_path); - - if let Some(idx) = subtitle_stream_index { - cmd.arg("-filter_complex").arg(format!( - "[0:v]subtitles={}:si={}", - link_path.to_string_lossy(), - idx - )); - } - - cmd.args(["-vframes", "1"]) - .args(["-loglevel", "quiet"]) - .arg("-y") - .arg(&dest_path); + apply_args(&mut cmd, &link_path, &dest_path); let status = cmd .status() @@ -117,3 +106,50 @@ pub async fn take_screencap>( .await .with_context(|| format!("Failed to read ffmpeg output file {}", dest_path.display())) } + +pub async fn take_screencap>( + source: &P, + timestamp_secs: f64, + subtitle_stream_index: Option, +) -> anyhow::Result> { + take_ffmpeg_capture(source, "png", |cmd, in_path, out_path| { + cmd.arg("-ss") + .arg(format!("{:.2}", timestamp_secs)) + .arg("-copyts") + .arg("-i") + .arg(in_path); + + if let Some(idx) = subtitle_stream_index { + cmd.arg("-filter_complex").arg(format!( + "[0:v]subtitles={}:si={}", + in_path.to_string_lossy(), + idx + )); + } + + cmd.args(["-vframes", "1"]) + .args(["-loglevel", "quiet"]) + .arg("-y") + .arg(out_path); + }) + .await +} + +pub async fn take_audio_clip>( + source: &P, + timestamp_secs: f64, + duration_secs: f64, +) -> anyhow::Result> { + take_ffmpeg_capture(source, "mp3", |cmd, in_path, out_path| { + cmd.arg("-ss") + .arg(format!("{:.2}", timestamp_secs)) + .arg("-t") + .arg(format!("{:.2}", duration_secs)) + .arg("-i") + .arg(in_path) + .args(["-loglevel", "quiet"]) + .arg("-y") + .arg(out_path); + }) + .await +} diff --git a/src/shows/mod.rs b/src/shows/mod.rs index f9644a6..27776fe 100644 --- a/src/shows/mod.rs +++ b/src/shows/mod.rs @@ -6,8 +6,9 @@ use std::{ use anyhow::{anyhow, Context}; use log::{debug, error}; +use regex::Regex; use serde::Deserialize; -use serde_with::{serde_as, KeyValueMap}; +use serde_with::{serde_as, DisplayFromStr, KeyValueMap}; mod enumeration; @@ -17,6 +18,8 @@ pub struct Show { pub title: String, pub path: PathBuf, #[serde(default)] + pub custom_episodes: Option, + #[serde(default)] pub tags: Vec, #[serde(default)] pub parts: HashMap, @@ -26,6 +29,15 @@ pub struct Show { pub eighteen_plus: Option, } +#[serde_as] +#[derive(Deserialize)] +pub struct CustomEpisodes { + #[serde(default)] + pub prefix: String, + #[serde_as(as = "DisplayFromStr")] + pub regex: Regex, +} + fn default_weight() -> f32 { 1.0 } @@ -36,11 +48,12 @@ pub type Shows = Vec; #[derive(Deserialize)] struct ShowsWrapper(#[serde_as(as = "KeyValueMap<_>")] Shows); -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum EpisodeNumber { Standalone, SingleSeason(u32), MultiSeason(u32, u32), + Custom(String), } type Episodes = HashMap; @@ -53,7 +66,7 @@ pub fn load>(shows_file: P) -> anyhow::Result { .0) } -pub fn display_show_episode(show: &Show, episode: EpisodeNumber) -> String { +pub fn display_show_episode(show: &Show, episode: &EpisodeNumber) -> String { match episode { EpisodeNumber::Standalone => show.title.to_string(), EpisodeNumber::SingleSeason(n) => format!("{} episode {}", show.title, n), @@ -61,10 +74,17 @@ pub fn display_show_episode(show: &Show, episode: EpisodeNumber) -> String { "{} {} episode {}", show.title, show.parts - .get(&season) + .get(season) .unwrap_or(&format!("season {}", season)), ep ), + EpisodeNumber::Custom(s) => { + show.custom_episodes + .as_ref() + .map(|c| c.prefix.clone()) + .unwrap_or_default() + + s + } } } @@ -110,8 +130,24 @@ impl Show { }) .filter_map(|r| r.transpose()) .collect::>>()?; - enumeration::enumerate_episodes(files) - .ok_or(anyhow!("Could not detect any episode numbering scheme")) + if let Some(CustomEpisodes { ref regex, .. }) = self.custom_episodes { + files + .into_iter() + .filter_map(|f| -> Option> { + let episode_name = regex + .captures(f.file_name().unwrap().to_str().unwrap())? + .name("episode") + .map(|m| m.as_str().to_string()) + .ok_or(anyhow!( + "Failed to find capture group `episode` in episode regex" + )); + Some(episode_name.map(|n| (EpisodeNumber::Custom(n), f))) + }) + .collect::>() + } else { + enumeration::enumerate_episodes(files) + .ok_or(anyhow!("Could not detect any episode numbering scheme")) + } } else { Err(anyhow!("The show's path is not a file or a directory")) }