questden frontend

2022-02-20 04:09:40 -05:00 · 2022-02-20 04:09:40 -05:00 · 0a2c67f47c
commit 0a2c67f47c
4 changed files with 2200 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+/target
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,15 @@
+[package]
+name = "mspaify"
+version = "0.1.0"
+edition = "2021"
+authors = ["xenofem <xenofem@xeno.science>"]
+
+[dependencies]
+actix-web = { version = "4.0.0-rc.3" }
+ammonia = "3.1"
+lazy_static = "1.4"
+scraper = "0.12"
+reqwest = { version = "0.11", features = ["rustls-tls"], default-features = false }
+time = { version = "0.3", features = ["local-offset", "macros", "formatting", "parsing"] }
+tokio = { version = "1", features = ["full"] }
+urlencoding = "2.1"
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,172 @@
+use actix_web::{
+    get,
+    web::{self, HttpResponse},
+    App, HttpServer, Responder,
+};
+use lazy_static::lazy_static;
+use scraper::{Html, Selector};
+use time::{format_description::well_known::Rfc3339, OffsetDateTime, PrimitiveDateTime, UtcOffset};
+
+lazy_static! {
+    static ref FIRST_TEXT_SEL: Selector = Selector::parse("#delform > blockquote").unwrap();
+    static ref FIRST_IMAGE_SEL: Selector =
+        Selector::parse("#delform > .postwidth > .filesize > a").unwrap();
+    static ref FIRST_AUTHOR_SEL: Selector =
+        Selector::parse("#delform > .postwidth > .uid").unwrap();
+    static ref FIRST_TIME_SEL: Selector = Selector::parse("#delform > .postwidth > label").unwrap();
+    static ref POST_SEL: Selector = Selector::parse("#delform > table").unwrap();
+    static ref TEXT_SEL: Selector = Selector::parse("blockquote").unwrap();
+    static ref IMAGE_SEL: Selector = Selector::parse(".postwidth > .filesize > a").unwrap();
+    static ref AUTHOR_SEL: Selector = Selector::parse(".postwidth > .uid").unwrap();
+    static ref TIME_SEL: Selector = Selector::parse(".postwidth > label").unwrap();
+    static ref ID_SEL: Selector = Selector::parse(".postwidth > a:first-child").unwrap();
+}
+
+const DATETIME_FORMAT: &[time::format_description::FormatItem] = time::macros::format_description!(
+    "[year]/[month]/[day]([weekday repr:short])[hour repr:24]:[minute]"
+);
+
+#[actix_web::main]
+async fn main() -> std::io::Result<()> {
+    HttpServer::new(|| App::new().service(feed))
+        .bind("127.0.0.1:10413")?
+        .run()
+        .await
+}
+
+#[derive(Debug)]
+struct Post {
+    text: String,
+    image: Option<String>,
+    id: Option<String>,
+    time: OffsetDateTime,
+}
+
+#[get("/{thread_id}/feed.xml")]
+async fn feed(path: web::Path<String>) -> impl Responder {
+    let thread_id = path.into_inner();
+    if !id_ok(&thread_id) {
+        return HttpResponse::BadRequest().finish();
+    }
+
+    let thread = match get_posts(&thread_id).await {
+        Some(posts) => posts,
+        None => return HttpResponse::InternalServerError().finish(),
+    };
+    let mut feed = format!(
+        r#"<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <title>QuestDen Thread {0}</title>
+  <link href="https://questden.org/kusaba/quest/res/{0}.html" />
+  <updated>{1}</updated>
+"#,
+        thread_id,
+        thread
+            .last()
+            .and_then(|p| p.time.format(&Rfc3339).ok())
+            .unwrap_or_default()
+    );
+    for post in thread.iter() {
+        feed.push_str(
+            &format!(r#"<entry>
+  <title>{0}</title>
+  <updated>{4}</updated>
+  <content type="html">{1}{2}</content>
+  <link rel="alternate" type="text/html" href="https://questden.org/kusaba/quest/res/{3}.html#{0}" />
+</entry>
+"#, post.id.as_ref().map(|s| s.as_str()).unwrap_or(""), post.image.as_ref().map(|f| ammonia::clean_text(&format!(r#"<img src="https://questden.org/kusaba/quest/src/{}" />"#, f))).unwrap_or_default(), ammonia::clean_text(&post.text), thread_id, post.time.format(&Rfc3339).unwrap_or_default()
+            )
+        );
+    }
+    feed.push_str("</feed>");
+    HttpResponse::Ok().body(feed)
+}
+
+async fn get_posts(thread_id: &str) -> Option<Vec<Post>> {
+    let body = reqwest::get(format!(
+        "https://questden.org/kusaba/quest/res/{}.html",
+        thread_id
+    ))
+    .await
+    .ok()?
+    .text()
+    .await
+    .ok()?;
+    let document = Html::parse_document(&body);
+
+    let first_author = document.select(&FIRST_AUTHOR_SEL).next()?.inner_html();
+    let first_text = ammonia::clean(&document.select(&FIRST_TEXT_SEL).next()?.inner_html());
+    let first_image = document
+        .select(&FIRST_IMAGE_SEL)
+        .next()
+        .and_then(extract_image_url);
+    let first_time = document
+        .select(&FIRST_TIME_SEL)
+        .next()
+        .and_then(extract_time)
+        .unwrap_or(OffsetDateTime::UNIX_EPOCH);
+
+    let mut posts = vec![Post {
+        text: first_text,
+        image: first_image,
+        id: None,
+        time: first_time,
+    }];
+
+    for element in document.select(&POST_SEL) {
+        let author = element.select(&AUTHOR_SEL).next().map(|el| el.inner_html());
+        if author.as_ref() != Some(&first_author) {
+            continue;
+        }
+        let text = match element.select(&TEXT_SEL).next() {
+            Some(el) => ammonia::clean(&el.inner_html()),
+            None => continue,
+        };
+        let image = element
+            .select(&IMAGE_SEL)
+            .next()
+            .and_then(extract_image_url);
+        let id = element
+            .select(&ID_SEL)
+            .next()
+            .and_then(|el| el.value().attr("name"))
+            .and_then(|n| {
+                if id_ok(n) {
+                    Some(String::from(n))
+                } else {
+                    None
+                }
+            });
+        let time = element
+            .select(&TIME_SEL)
+            .next()
+            .and_then(extract_time)
+            .unwrap_or(OffsetDateTime::UNIX_EPOCH);
+        posts.push(Post {
+            text,
+            image,
+            id,
+            time,
+        });
+    }
+
+    Some(posts)
+}
+
+fn id_ok(id: &str) -> bool {
+    id.chars().all(|c| c.is_ascii_digit())
+}
+
+fn extract_image_url(el: scraper::ElementRef) -> Option<String> {
+    el.value()
+        .attr("href")
+        .and_then(|path| path.split('/').next_back())
+        .map(|f| urlencoding::encode(f).into_owned())
+}
+
+fn extract_time(el: scraper::ElementRef) -> Option<OffsetDateTime> {
+    el.text()
+        .last()
+        .and_then(|d| PrimitiveDateTime::parse(d.trim(), &DATETIME_FORMAT).ok())
+        .map(|t| t.assume_offset(UtcOffset::current_local_offset().unwrap_or(UtcOffset::UTC)))
+}