1.4.0-beta: add audio captures (audio metadata isn't quite working yet in eggbug-rs fork)

This commit is contained in:
xenofem 2023-07-30 00:50:53 -04:00
parent 4ec1794403
commit 47eb454f0c
7 changed files with 219 additions and 82 deletions

6
Cargo.lock generated
View file

@ -270,7 +270,7 @@ checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
[[package]]
name = "eggbug"
version = "0.1.3"
source = "git+https://github.com/iliana/eggbug-rs.git?branch=main#94fc2f652a842b0fadfff62750562630e887672a"
source = "git+https://github.com/xenofem/eggbug-rs.git?branch=audio-attachments#6bcf51bceb3745f96ef0c9026d3143093fec032b"
dependencies = [
"base64 0.13.1",
"bytes",
@ -1219,7 +1219,7 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "screencap-bot"
version = "1.3.0"
version = "1.4.0-beta"
dependencies = [
"anyhow",
"dotenvy",
@ -1236,6 +1236,7 @@ dependencies = [
"serde_yaml",
"tempfile",
"tokio",
"tracing",
]
[[package]]
@ -1582,6 +1583,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
dependencies = [
"cfg-if",
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",

View file

@ -1,6 +1,6 @@
[package]
name = "screencap-bot"
version = "1.3.0"
version = "1.4.0-beta"
edition = "2021"
authors = ["xenofem <xenofem@xeno.science>"]
license = "MIT"
@ -8,7 +8,7 @@ license = "MIT"
[dependencies]
anyhow = "1.0.71"
dotenvy = "0.15.7"
eggbug = { git = "https://github.com/iliana/eggbug-rs.git", branch = "main" }
eggbug = { git = "https://github.com/xenofem/eggbug-rs.git", branch = "audio-attachments" }
env_logger = "0.10"
ffmpeg-next = "6.0.0"
imagesize = "0.12.0"
@ -20,4 +20,5 @@ serde = "1"
serde_with = "3"
serde_yaml = "0.9.22"
tempfile = "3.6.0"
tokio = { version = "1.28.2", features = ["full"] }
tokio = { version = "1.28.2", features = ["full"] }
tracing = { version = "0.1", features = ["log"] }

View file

@ -1,7 +1,8 @@
# screencap-bot
this is a cohost bot that periodically posts randomly-chosen
screencaps from a configured collection of tv series/movies.
screencaps or audio clips from a configured collection of tv
series/movies/podcasts/etc.
## installation
@ -17,15 +18,19 @@ screencap-bot is configured with the following environment variables,
which can also be put in a `.env` file in the program's working
directory:
- `SCREENCAP_BOT_SHOWS_FILE`: path of a YAML file specifying what shows to take screencaps from (default: `./shows.yaml`)
- `SCREENCAP_BOT_CAPTURE_IMAGES`: whether to take screenshots (default: `true`)
- `SCREENCAP_BOT_CAPTURE_AUDIO_DURATION`: length of audio clips to capture, in seconds (default: unset, no audio capture)
- `SCREENCAP_BOT_SHOWS_FILE`: path of a YAML file specifying what shows to take captures from (default: `./shows.yaml`)
- `SCREENCAP_BOT_GLOBAL_TAGS`: tags to put on every post the bot makes, as a comma-separated list (eg `bot account,automated post,The Cohost Bot Feed`) (default: none)
- `SCREENCAP_BOT_POST_INTERVAL`: the interval between posts, in
seconds (default: 0, post a single screencap and then exit)
seconds (default: 0, post a single capture and then exit)
- `SCREENCAP_BOT_COHOST_EMAIL`: the email address the bot should use to log into cohost
- `SCREENCAP_BOT_COHOST_PASSWORD`: the password the bot should use to log into cohost
- `SCREENCAP_BOT_COHOST_PAGE`: the cohost page the bot should post from
- `SCREENCAP_BOT_COHOST_DRAFT`: whether to create cohost posts as
drafts, eg for testing (default: `false`)
- `SCREENCAP_BOT_COHOST_CW`: whether to CW posts with the episode
number (default: `true` if taking screenshots, `false` if not)
- `SCREENCAP_BOT_18PLUS`: whether posts should be flagged as
containing 18+ content (default: `false`). this can be overridden
for individual shows, see below.
@ -56,6 +61,11 @@ MS IGLOO:
Gundam 0069:
path: /home/user/media/Gundam 0069
18+: true
Friends at the Table:
path: /home/user/media/Friends at the Table
custom_episodes:
prefix: "Friends at the Table: "
regex: '^\d{4}-\d{2}-\d{2} - (?<episode>.*)\.mp3$'
```
each top-level key is a show title, which will be used in spoiler
@ -75,3 +85,10 @@ warnings on posts and in image alt text. each show has two keys:
- `18+`: an optional setting for whether screencaps from this show
should be flagged as containing 18+ content. if present, this takes
precedence over the `SCREENCAP_BOT_18PLUS` environment variable.
- `custom_episodes`: Rather than letting the bot auto-detect episode
numbering, you can extract episode numbers from filenames using a regex.
+ `regex`: should match a filename, and capture the episode number
or title in a capture group named `episode`. Files that don't
match the regex will be ignored.
+ `prefix`: Will be prepended to whatever is captured by the
regex. (default: empty string)

View file

@ -7,6 +7,8 @@ use std::{
};
pub struct Config {
pub capture_images: bool,
pub capture_audio_duration: Option<f64>,
pub shows_file: PathBuf,
pub global_tags: Vec<String>,
pub post_interval: Duration,
@ -14,6 +16,7 @@ pub struct Config {
pub cohost_password: String,
pub cohost_page: String,
pub cohost_draft: bool,
pub cohost_cw: bool,
pub eighteen_plus: bool,
}
@ -35,7 +38,11 @@ fn expect_var(name: &str) -> String {
}
pub fn load() -> Config {
let capture_images = parse_var("CAPTURE_IMAGES").unwrap_or(true);
Config {
capture_images,
capture_audio_duration: parse_var("CAPTURE_AUDIO_DURATION").ok(),
shows_file: parse_var("SHOWS_FILE").unwrap_or(PathBuf::from("./shows.yaml")),
global_tags: get_var("GLOBAL_TAGS")
.map(|s| s.split(',').map(String::from).collect())
@ -45,6 +52,7 @@ pub fn load() -> Config {
cohost_password: expect_var("COHOST_PASSWORD"),
cohost_page: expect_var("COHOST_PAGE"),
cohost_draft: parse_var("COHOST_DRAFT").unwrap_or(false),
cohost_cw: parse_var("COHOST_CW").unwrap_or(capture_images),
eighteen_plus: parse_var("18PLUS").unwrap_or(false),
}
}

View file

@ -12,8 +12,8 @@ use rand::{
use shows::Shows;
mod config;
mod media;
mod shows;
mod video;
lazy_static! {
static ref RETRY_INTERVAL: Duration = Duration::from_secs(30);
@ -29,6 +29,12 @@ async fn main() -> anyhow::Result<()> {
let conf = config::load();
if let (false, None) = (conf.capture_images, conf.capture_audio_duration) {
return Err(anyhow!(
"At least one of image capture and audio capture must be enabled!"
));
}
info!("Loading shows from {}", conf.shows_file.display());
let shows = shows::load(&conf.shows_file).with_context(|| {
format!(
@ -49,9 +55,9 @@ async fn main() -> anyhow::Result<()> {
.context("Failed to login to cohost")?;
loop {
let result = post_random_screencap(&conf, &shows, &session, &dist, &mut rng)
let result = post_random_capture(&conf, &shows, &session, &dist, &mut rng)
.await
.context("Failed to post a random screencap");
.context("Failed to post a random capture");
if conf.post_interval == Duration::ZERO {
return result;
@ -68,7 +74,7 @@ async fn main() -> anyhow::Result<()> {
}
}
async fn post_random_screencap<R: Rng>(
async fn post_random_capture<R: Rng>(
conf: &Config,
shows: &Shows,
session: &eggbug::Session,
@ -85,47 +91,73 @@ async fn post_random_screencap<R: Rng>(
})?;
let (num, file) = episodes.iter().choose(rng).unwrap();
let descriptor = shows::display_show_episode(show, *num);
let descriptor = shows::display_show_episode(show, num);
info!("Selected: {} - {}", descriptor, file.display());
let video_info = video::get_video_info(file, Some("eng")).with_context(|| {
format!(
"Failed to get duration and subtitle stream index for video file {}",
file.display()
)
})?;
let media_info = media::get_media_info(file, Some("eng"))
.with_context(|| format!("Failed to get info for media file {}", file.display()))?;
debug!(
"Video duration: {}",
format_timestamp(video_info.duration_secs, None)
"Media duration: {}",
format_timestamp(media_info.duration_secs, None)
);
debug!(
"Subtitle stream index: {:?}",
video_info.subtitle_stream_index
media_info.subtitle_stream_index
);
let timestamp = video_info.duration_secs * rng.sample::<f64, _>(Standard);
let formatted_timestamp = format_timestamp(timestamp, Some(video_info.duration_secs));
info!("Taking screencap at {}", formatted_timestamp);
let max_timestamp = match conf.capture_audio_duration {
Some(d) => media_info.duration_secs - d,
None => media_info.duration_secs,
};
let timestamp = max_timestamp * rng.sample::<f64, _>(Standard);
let formatted_timestamp = format_timestamp(timestamp, Some(media_info.duration_secs));
info!("Taking capture at {}", formatted_timestamp);
let img_data = video::take_screencap(file, timestamp, video_info.subtitle_stream_index)
.await
.context("Failed to take screencap")?;
let mut attachments = Vec::new();
let img_size = imagesize::blob_size(&img_data)
.context("Failed to get image size for screencap image data")?;
if conf.capture_images {
let image_data = media::take_screencap(file, timestamp, media_info.subtitle_stream_index)
.await
.context("Failed to take screencap")?;
let attachment = eggbug::Attachment::new(
img_data,
format!("{} @{}.png", descriptor, formatted_timestamp),
String::from("image/png"),
Some(img_size.width as u32),
Some(img_size.height as u32),
)
.with_alt_text(format!(
"Screencap of {} at {}",
descriptor, formatted_timestamp
));
let image_size = imagesize::blob_size(&image_data)
.context("Failed to get image size for screencap image data")?;
let image_attachment = eggbug::Attachment::new(
image_data,
format!("{} @{}.png", descriptor, formatted_timestamp),
String::from("image/png"),
eggbug::MediaMetadata::Image {
width: Some(image_size.width as u32),
height: Some(image_size.height as u32),
},
)
.with_alt_text(format!(
"Screencap of {} at {}",
descriptor, formatted_timestamp
));
attachments.push(image_attachment);
}
if let Some(duration) = conf.capture_audio_duration {
let audio_data = media::take_audio_clip(file, timestamp, duration)
.await
.context("Failed to take audio clip")?;
let audio_attachment = eggbug::Attachment::new(
audio_data,
format!("{} @{}.mp3", descriptor, formatted_timestamp),
String::from("audio/mpeg"),
eggbug::MediaMetadata::Audio {
artist: show.title.clone(),
title: descriptor.clone(),
},
);
attachments.push(audio_attachment);
}
let mut tags = show.tags.clone();
tags.extend_from_slice(&conf.global_tags);
@ -134,14 +166,19 @@ async fn post_random_screencap<R: Rng>(
.create_post(
&conf.cohost_page,
&mut eggbug::Post {
content_warnings: vec![descriptor],
attachments: vec![attachment],
content_warnings: if conf.cohost_cw {
vec![descriptor]
} else {
vec![]
},
attachments,
tags,
draft: conf.cohost_draft,
adult_content: show.eighteen_plus.unwrap_or(conf.eighteen_plus),
headline: String::new(),
markdown: String::new(),
metadata: None,
ask: None,
},
)
.await

View file

@ -4,6 +4,7 @@ use ffmpeg_next::{
media::Type,
};
use lazy_static::lazy_static;
use log::debug;
use regex::Regex;
use tempfile::tempdir;
use tokio::{fs, process::Command};
@ -14,19 +15,20 @@ lazy_static! {
static ref SUBTITLE_FORBID_REGEX: Regex = Regex::new("(?i)sign|song").unwrap();
}
pub struct VideoInfo {
pub struct MediaInfo {
pub duration_secs: f64,
// The index among the subtitle streams, not among the streams in general
pub subtitle_stream_index: Option<usize>,
}
pub fn get_video_info<P: AsRef<Path>>(
pub fn get_media_info<P: AsRef<Path>>(
source: &P,
subtitle_lang: Option<&str>,
) -> anyhow::Result<VideoInfo> {
let ctx = input(source).context("Failed to load video file")?;
) -> anyhow::Result<MediaInfo> {
let ctx = input(source).context("Failed to load media file")?;
let duration_secs = ctx.duration() as f64 / f64::from(ffmpeg_next::ffi::AV_TIME_BASE);
debug!("{:?}", ctx.metadata());
let subtitle_stream_index = subtitle_lang.and_then(|lang| {
ctx.streams()
@ -54,53 +56,40 @@ pub fn get_video_info<P: AsRef<Path>>(
.map(|(idx, _)| idx)
});
Ok(VideoInfo {
Ok(MediaInfo {
duration_secs,
subtitle_stream_index,
})
}
pub async fn take_screencap<P: AsRef<Path>>(
async fn take_ffmpeg_capture<P, F>(
source: &P,
timestamp_secs: f64,
subtitle_stream_index: Option<usize>,
) -> anyhow::Result<Vec<u8>> {
let ext = source.as_ref().extension().and_then(|s| s.to_str());
if ext != Some("mkv") && ext != Some("mp4") {
output_ext: &str,
apply_args: F,
) -> anyhow::Result<Vec<u8>>
where
P: AsRef<Path>,
F: FnOnce(&mut Command, &Path, &Path),
{
let input_ext = source.as_ref().extension().and_then(|s| s.to_str());
if input_ext.map(|e| e.chars().all(|c| c.is_ascii_alphanumeric())) != Some(true) {
return Err(anyhow!(
"Video file {} had unexpected file extension",
"Media file {} had unexpected file extension",
source.as_ref().display()
));
}
let tmp_dir = tempdir()
.context("Failed to create temporary directory for ffmpeg input and output files")?;
let link_path = tmp_dir.path().join(format!("in.{}", ext.unwrap()));
let link_path = tmp_dir.path().join(format!("in.{}", input_ext.unwrap()));
fs::symlink(source, &link_path)
.await
.context("Failed to create symlink for video file")?;
let dest_path = tmp_dir.path().join("out.png");
.context("Failed to create symlink for input file")?;
let dest_path = tmp_dir.path().join(format!("out.{}", output_ext));
let mut cmd = Command::new("ffmpeg");
cmd.arg("-ss")
.arg(format!("{:.2}", timestamp_secs))
.arg("-copyts")
.arg("-i")
.arg(&link_path);
if let Some(idx) = subtitle_stream_index {
cmd.arg("-filter_complex").arg(format!(
"[0:v]subtitles={}:si={}",
link_path.to_string_lossy(),
idx
));
}
cmd.args(["-vframes", "1"])
.args(["-loglevel", "quiet"])
.arg("-y")
.arg(&dest_path);
apply_args(&mut cmd, &link_path, &dest_path);
let status = cmd
.status()
@ -117,3 +106,50 @@ pub async fn take_screencap<P: AsRef<Path>>(
.await
.with_context(|| format!("Failed to read ffmpeg output file {}", dest_path.display()))
}
pub async fn take_screencap<P: AsRef<Path>>(
source: &P,
timestamp_secs: f64,
subtitle_stream_index: Option<usize>,
) -> anyhow::Result<Vec<u8>> {
take_ffmpeg_capture(source, "png", |cmd, in_path, out_path| {
cmd.arg("-ss")
.arg(format!("{:.2}", timestamp_secs))
.arg("-copyts")
.arg("-i")
.arg(in_path);
if let Some(idx) = subtitle_stream_index {
cmd.arg("-filter_complex").arg(format!(
"[0:v]subtitles={}:si={}",
in_path.to_string_lossy(),
idx
));
}
cmd.args(["-vframes", "1"])
.args(["-loglevel", "quiet"])
.arg("-y")
.arg(out_path);
})
.await
}
pub async fn take_audio_clip<P: AsRef<Path>>(
source: &P,
timestamp_secs: f64,
duration_secs: f64,
) -> anyhow::Result<Vec<u8>> {
take_ffmpeg_capture(source, "mp3", |cmd, in_path, out_path| {
cmd.arg("-ss")
.arg(format!("{:.2}", timestamp_secs))
.arg("-t")
.arg(format!("{:.2}", duration_secs))
.arg("-i")
.arg(in_path)
.args(["-loglevel", "quiet"])
.arg("-y")
.arg(out_path);
})
.await
}

View file

@ -6,8 +6,9 @@ use std::{
use anyhow::{anyhow, Context};
use log::{debug, error};
use regex::Regex;
use serde::Deserialize;
use serde_with::{serde_as, KeyValueMap};
use serde_with::{serde_as, DisplayFromStr, KeyValueMap};
mod enumeration;
@ -17,6 +18,8 @@ pub struct Show {
pub title: String,
pub path: PathBuf,
#[serde(default)]
pub custom_episodes: Option<CustomEpisodes>,
#[serde(default)]
pub tags: Vec<String>,
#[serde(default)]
pub parts: HashMap<u32, String>,
@ -26,6 +29,15 @@ pub struct Show {
pub eighteen_plus: Option<bool>,
}
#[serde_as]
#[derive(Deserialize)]
pub struct CustomEpisodes {
#[serde(default)]
pub prefix: String,
#[serde_as(as = "DisplayFromStr")]
pub regex: Regex,
}
fn default_weight() -> f32 {
1.0
}
@ -36,11 +48,12 @@ pub type Shows = Vec<Show>;
#[derive(Deserialize)]
struct ShowsWrapper(#[serde_as(as = "KeyValueMap<_>")] Shows);
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub enum EpisodeNumber {
Standalone,
SingleSeason(u32),
MultiSeason(u32, u32),
Custom(String),
}
type Episodes = HashMap<EpisodeNumber, PathBuf>;
@ -53,7 +66,7 @@ pub fn load<P: AsRef<Path>>(shows_file: P) -> anyhow::Result<Shows> {
.0)
}
pub fn display_show_episode(show: &Show, episode: EpisodeNumber) -> String {
pub fn display_show_episode(show: &Show, episode: &EpisodeNumber) -> String {
match episode {
EpisodeNumber::Standalone => show.title.to_string(),
EpisodeNumber::SingleSeason(n) => format!("{} episode {}", show.title, n),
@ -61,10 +74,17 @@ pub fn display_show_episode(show: &Show, episode: EpisodeNumber) -> String {
"{} {} episode {}",
show.title,
show.parts
.get(&season)
.get(season)
.unwrap_or(&format!("season {}", season)),
ep
),
EpisodeNumber::Custom(s) => {
show.custom_episodes
.as_ref()
.map(|c| c.prefix.clone())
.unwrap_or_default()
+ s
}
}
}
@ -110,8 +130,24 @@ impl Show {
})
.filter_map(|r| r.transpose())
.collect::<anyhow::Result<Vec<PathBuf>>>()?;
enumeration::enumerate_episodes(files)
.ok_or(anyhow!("Could not detect any episode numbering scheme"))
if let Some(CustomEpisodes { ref regex, .. }) = self.custom_episodes {
files
.into_iter()
.filter_map(|f| -> Option<anyhow::Result<(EpisodeNumber, PathBuf)>> {
let episode_name = regex
.captures(f.file_name().unwrap().to_str().unwrap())?
.name("episode")
.map(|m| m.as_str().to_string())
.ok_or(anyhow!(
"Failed to find capture group `episode` in episode regex"
));
Some(episode_name.map(|n| (EpisodeNumber::Custom(n), f)))
})
.collect::<anyhow::Result<Episodes>>()
} else {
enumeration::enumerate_episodes(files)
.ok_or(anyhow!("Could not detect any episode numbering scheme"))
}
} else {
Err(anyhow!("The show's path is not a file or a directory"))
}