1.5.0: make preferred audio and subtitle languages configurable

2023-08-01 01:09:31 -04:00 · 2023-08-01 01:09:31 -04:00 · f584b17dd1
commit f584b17dd1
parent 0f75889a3e
6 changed files with 121 additions and 33 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1220,7 +1220,7 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"

 [[package]]
 name = "screencap-bot"
-version = "1.4.1"
+version = "1.5.0"
 dependencies = [
 "anyhow",
 "dotenvy",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "screencap-bot"
-version = "1.4.1"
+version = "1.5.0"
 edition = "2021"
 authors = ["xenofem <xenofem@xeno.science>"]
 license = "MIT"
--- a/README.md
+++ b/README.md
@ -18,15 +18,33 @@ screencap-bot is configured with the following environment variables,
 which can also be put in a `.env` file in the program's working
 directory:

- `SCREENCAP_BOT_CAPTURE_IMAGES`: whether to take screenshots (default: `true`)
- `SCREENCAP_BOT_CAPTURE_AUDIO_DURATION`: length of audio clips to capture, in seconds (default: unset, no audio capture)
- `SCREENCAP_BOT_SHOWS_FILE`: path of a YAML file specifying what shows to take captures from (default: `./shows.yaml`)
- `SCREENCAP_BOT_GLOBAL_TAGS`: tags to put on every post the bot makes, as a comma-separated list (eg `bot account,automated post,The Cohost Bot Feed`) (default: none)
+- `SCREENCAP_BOT_CAPTURE_IMAGES`: whether to take screenshots
+  (default: `true`)
+- `SCREENCAP_BOT_CAPTURE_AUDIO_DURATION`: length of audio clips to
+  capture, in seconds. if unset, or set to 0, audio will not be
+  captured. (default: unset, no audio capture)
+- `SCREENCAP_BOT_SUBTITLE_LANGUAGE`: ISO-639-2 three-letter code for a
+  subtitle language to embed in screenshots. if this is set to an
+  empty string, or if a video doesn't have any subtitle track tagged
+  with this language, no subtitles will be embedded. (default: `eng`)
+- `SCREENCAP_BOT_AUDIO_LANGUAGE`: ISO-639-2 three-letter code for an
+  audio language to prefer when capturing audio clips. if this is
+  unset, set to an empty string, or if a media file doesn't have any
+  audio track tagged with this language, screencap-bot will choose an
+  arbitrary audio track. (default: unset)
+- `SCREENCAP_BOT_SHOWS_FILE`: path of a YAML file specifying what
+  shows to take captures from (default: `./shows.yaml`)
+- `SCREENCAP_BOT_GLOBAL_TAGS`: tags to put on every post the bot
+  makes, as a comma-separated list (eg `bot account,automated post,The
+  Cohost Bot Feed`) (default: none)
 - `SCREENCAP_BOT_POST_INTERVAL`: the interval between posts, in
  seconds (default: 0, post a single capture and then exit)
- `SCREENCAP_BOT_COHOST_EMAIL`: the email address the bot should use to log into cohost
- `SCREENCAP_BOT_COHOST_PASSWORD`: the password the bot should use to log into cohost
- `SCREENCAP_BOT_COHOST_PAGE`: the cohost page the bot should post from
+- `SCREENCAP_BOT_COHOST_EMAIL`: the email address the bot should use
+  to log into cohost
+- `SCREENCAP_BOT_COHOST_PASSWORD`: the password the bot should use to
+  log into cohost
+- `SCREENCAP_BOT_COHOST_PAGE`: the cohost page the bot should post
+  from
 - `SCREENCAP_BOT_COHOST_DRAFT`: whether to create cohost posts as
  drafts, eg for testing (default: `false`)
 - `SCREENCAP_BOT_COHOST_CW`: whether to CW posts with the episode
--- a/src/config.rs
+++ b/src/config.rs
@ -12,6 +12,8 @@ use anyhow::{anyhow, Context};
 pub struct Config {
    pub capture_images: bool,
    pub capture_audio_duration: Option<f64>,
+    pub subtitle_language: Option<String>,
+    pub audio_language: Option<String>,
    pub shows_file: PathBuf,
    pub global_tags: Vec<String>,
    pub post_interval: Duration,
@ -49,18 +51,55 @@ fn require_var(name: &str) -> anyhow::Result<String> {
    get_var(name)?.ok_or_else(|| anyhow!("{}{} must be set", VAR_PREFIX, name))
 }

+fn get_language_code_var<F: FnOnce() -> Option<String>>(
+    name: &str,
+    default: F,
+) -> anyhow::Result<Option<String>> {
+    match get_var(name)? {
+        Some(s) => {
+            if s.is_ascii() && s.len() == 3 {
+                Ok(Some(s))
+            } else if s.is_empty() {
+                Ok(None)
+            } else {
+                Err(anyhow!(
+                    "{}{} must be an ISO-639-2 three-letter language code",
+                    VAR_PREFIX,
+                    name
+                ))
+            }
+        }
+        None => Ok(default()),
+    }
+}
+
 pub fn load() -> anyhow::Result<Config> {
    let capture_images = parse_var("CAPTURE_IMAGES")?.unwrap_or(true);

-    let capture_audio_duration = parse_var("CAPTURE_AUDIO_DURATION")?;
-    if let Some(d) = capture_audio_duration {
-        if d <= 0.0 {
-            return Err(anyhow!(
-                "{}CAPTURE_AUDIO_DURATION cannot be <= 0",
-                VAR_PREFIX
-            ));
+    let capture_audio_duration = match parse_var::<_, f64>("CAPTURE_AUDIO_DURATION")? {
+        Some(d) => {
+            if !d.is_finite() {
+                return Err(anyhow!(
+                    "non-finite float value for {}CAPTURE_AUDIO_DURATION",
+                    VAR_PREFIX
+                ));
+            } else if d >= 1.0 {
+                Some(d)
+            } else if d > 0.0 {
+                return Err(anyhow!(
+                    "cannot capture audio clips less than 1 second long"
+                ));
+            } else if d == 0.0 {
+                None
+            } else {
+                return Err(anyhow!(
+                    "{}CAPTURE_AUDIO_DURATION cannot be negative",
+                    VAR_PREFIX
+                ));
+            }
        }
-    }
+        None => None,
+    };

    if let (false, None) = (capture_images, capture_audio_duration) {
        return Err(anyhow!(
@ -71,6 +110,10 @@ pub fn load() -> anyhow::Result<Config> {
    Ok(Config {
        capture_images,
        capture_audio_duration,
+        subtitle_language: get_language_code_var("SUBTITLE_LANGUAGE", || {
+            Some(String::from("eng"))
+        })?,
+        audio_language: get_language_code_var("AUDIO_LANGUAGE", || None)?,
        shows_file: parse_var("SHOWS_FILE")?.unwrap_or_else(|| PathBuf::from("./shows.yaml")),
        global_tags: get_var("GLOBAL_TAGS")?
            .map(|s| s.split(',').map(String::from).collect())
--- a/src/main.rs
+++ b/src/main.rs
@ -89,8 +89,12 @@ async fn post_random_capture<R: Rng>(

    info!("Selected: {} - {}", descriptor, file.display());

-    let media_info = media::get_media_info(file, Some("eng"))
-        .with_context(|| format!("Failed to get info for media file {}", file.display()))?;
+    let media_info = media::get_media_info(
+        file,
+        conf.subtitle_language.as_deref(),
+        conf.audio_language.as_deref(),
+    )
+    .with_context(|| format!("Failed to get info for media file {}", file.display()))?;
    debug!(
        "Media duration: {}",
        format_timestamp(media_info.duration_secs, None)
@ -142,9 +146,10 @@ async fn post_random_capture<R: Rng>(
    }

    if let Some(duration) = conf.capture_audio_duration {
-        let audio_data = media::take_audio_clip(file, timestamp, duration)
-            .await
-            .context("Failed to take audio clip")?;
+        let audio_data =
+            media::take_audio_clip(file, timestamp, duration, media_info.audio_stream_index)
+                .await
+                .context("Failed to take audio clip")?;

        let audio_attachment = eggbug::Attachment::new(
            audio_data,
--- a/src/media.rs
+++ b/src/media.rs
@ -2,6 +2,7 @@ use anyhow::{anyhow, Context};
 use ffmpeg_next::{
    format::{input, stream::Disposition},
    media::Type,
+    Stream,
 };
 use lazy_static::lazy_static;
 use log::debug;
@ -19,11 +20,27 @@ pub struct MediaInfo {
    pub duration_secs: f64,
    // The index among the subtitle streams, not among the streams in general
    pub subtitle_stream_index: Option<usize>,
+    // The index among the audio streams, not among the streams in general
+    pub audio_stream_index: Option<usize>,
+}
+
+fn indexed_streams(
+    ctx: &ffmpeg_next::format::context::common::Context,
+    stream_type: Type,
+) -> impl Iterator<Item = (usize, Stream<'_>)> {
+    ctx.streams()
+        .filter(move |stream| {
+            ffmpeg_next::codec::context::Context::from_parameters(stream.parameters())
+                .map(|c| c.medium())
+                == Ok(stream_type)
+        })
+        .enumerate()
 }

 pub fn get_media_info<P: AsRef<Path>>(
    source: &P,
    subtitle_lang: Option<&str>,
+    audio_lang: Option<&str>,
 ) -> anyhow::Result<MediaInfo> {
    let ctx = input(source).context("Failed to load media file")?;

@ -31,13 +48,7 @@ pub fn get_media_info<P: AsRef<Path>>(
    debug!("{:?}", ctx.metadata());

    let subtitle_stream_index = subtitle_lang.and_then(|lang| {
-        ctx.streams()
-            .filter(|stream| {
-                ffmpeg_next::codec::context::Context::from_parameters(stream.parameters())
-                    .map(|c| c.medium())
-                    == Ok(Type::Subtitle)
-            })
-            .enumerate()
+        indexed_streams(&ctx, Type::Subtitle)
            .filter(|(_, stream)| {
                let metadata = stream.metadata();
                if metadata.get("language") != Some(lang) {
@ -56,9 +67,16 @@ pub fn get_media_info<P: AsRef<Path>>(
            .map(|(idx, _)| idx)
    });

+    let audio_stream_index = audio_lang.and_then(|lang| {
+        indexed_streams(&ctx, Type::Audio)
+            .find(|(_, stream)| stream.metadata().get("language") == Some(lang))
+            .map(|(idx, _)| idx)
+    });
+
    Ok(MediaInfo {
        duration_secs,
        subtitle_stream_index,
+        audio_stream_index,
    })
 }

@ -139,6 +157,7 @@ pub async fn take_audio_clip<P: AsRef<Path>>(
    source: &P,
    timestamp_secs: f64,
    duration_secs: f64,
+    audio_stream_index: Option<usize>,
 ) -> anyhow::Result<Vec<u8>> {
    take_ffmpeg_capture(source, "mp3", |cmd, in_path, out_path| {
        cmd.arg("-ss")
@ -146,10 +165,13 @@ pub async fn take_audio_clip<P: AsRef<Path>>(
            .arg("-t")
            .arg(format!("{:.2}", duration_secs))
            .arg("-i")
-            .arg(in_path)
-            .args(["-loglevel", "quiet"])
-            .arg("-y")
-            .arg(out_path);
+            .arg(in_path);
+
+        if let Some(idx) = audio_stream_index {
+            cmd.arg("-map").arg(format!("0:a:{}", idx));
+        }
+
+        cmd.args(["-loglevel", "quiet"]).arg("-y").arg(out_path);
    })
    .await
 }