questden frontend

2022-02-20 04:09:40 -05:00 · 2022-02-20 04:09:40 -05:00 · 0a2c67f47c
commit 0a2c67f47c
4 changed files with 2200 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 /target
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,15 @@
 [package]
 name = "mspaify"
 version = "0.1.0"
 edition = "2021"
 authors = ["xenofem <xenofem@xeno.science>"]
 [dependencies]
 actix-web = { version = "4.0.0-rc.3" }
 ammonia = "3.1"
 lazy_static = "1.4"
 scraper = "0.12"
 reqwest = { version = "0.11", features = ["rustls-tls"], default-features = false }
 time = { version = "0.3", features = ["local-offset", "macros", "formatting", "parsing"] }
 tokio = { version = "1", features = ["full"] }
 urlencoding = "2.1"
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,172 @@
 use actix_web::{
    get,
    web::{self, HttpResponse},
    App, HttpServer, Responder,
 };
 use lazy_static::lazy_static;
 use scraper::{Html, Selector};
 use time::{format_description::well_known::Rfc3339, OffsetDateTime, PrimitiveDateTime, UtcOffset};
 lazy_static! {
    static ref FIRST_TEXT_SEL: Selector = Selector::parse("#delform > blockquote").unwrap();
    static ref FIRST_IMAGE_SEL: Selector =
        Selector::parse("#delform > .postwidth > .filesize > a").unwrap();
    static ref FIRST_AUTHOR_SEL: Selector =
        Selector::parse("#delform > .postwidth > .uid").unwrap();
    static ref FIRST_TIME_SEL: Selector = Selector::parse("#delform > .postwidth > label").unwrap();
    static ref POST_SEL: Selector = Selector::parse("#delform > table").unwrap();
    static ref TEXT_SEL: Selector = Selector::parse("blockquote").unwrap();
    static ref IMAGE_SEL: Selector = Selector::parse(".postwidth > .filesize > a").unwrap();
    static ref AUTHOR_SEL: Selector = Selector::parse(".postwidth > .uid").unwrap();
    static ref TIME_SEL: Selector = Selector::parse(".postwidth > label").unwrap();
    static ref ID_SEL: Selector = Selector::parse(".postwidth > a:first-child").unwrap();
 }
 const DATETIME_FORMAT: &[time::format_description::FormatItem] = time::macros::format_description!(
    "[year]/[month]/[day]([weekday repr:short])[hour repr:24]:[minute]"
 );
 #[actix_web::main]
 async fn main() -> std::io::Result<()> {
    HttpServer::new(|| App::new().service(feed))
        .bind("127.0.0.1:10413")?
        .run()
        .await
 }
 #[derive(Debug)]
 struct Post {
    text: String,
    image: Option<String>,
    id: Option<String>,
    time: OffsetDateTime,
 }
 #[get("/{thread_id}/feed.xml")]
 async fn feed(path: web::Path<String>) -> impl Responder {
    let thread_id = path.into_inner();
    if !id_ok(&thread_id) {
        return HttpResponse::BadRequest().finish();
    }
    let thread = match get_posts(&thread_id).await {
        Some(posts) => posts,
        None => return HttpResponse::InternalServerError().finish(),
    };
    let mut feed = format!(
        r#"<?xml version="1.0" encoding="utf-8"?>
 <feed xmlns="http://www.w3.org/2005/Atom">
  <title>QuestDen Thread {0}</title>
  <link href="https://questden.org/kusaba/quest/res/{0}.html" />
  <updated>{1}</updated>
 "#,
        thread_id,
        thread
            .last()
            .and_then(|p| p.time.format(&Rfc3339).ok())
            .unwrap_or_default()
    );
    for post in thread.iter() {
        feed.push_str(
            &format!(r#"<entry>
  <title>{0}</title>
  <updated>{4}</updated>
  <content type="html">{1}{2}</content>
  <link rel="alternate" type="text/html" href="https://questden.org/kusaba/quest/res/{3}.html#{0}" />
 </entry>
 "#, post.id.as_ref().map(|s| s.as_str()).unwrap_or(""), post.image.as_ref().map(|f| ammonia::clean_text(&format!(r#"<img src="https://questden.org/kusaba/quest/src/{}" />"#, f))).unwrap_or_default(), ammonia::clean_text(&post.text), thread_id, post.time.format(&Rfc3339).unwrap_or_default()
            )
        );
    }
    feed.push_str("</feed>");
    HttpResponse::Ok().body(feed)
 }
 async fn get_posts(thread_id: &str) -> Option<Vec<Post>> {
    let body = reqwest::get(format!(
        "https://questden.org/kusaba/quest/res/{}.html",
        thread_id
    ))
    .await
    .ok()?
    .text()
    .await
    .ok()?;
    let document = Html::parse_document(&body);
    let first_author = document.select(&FIRST_AUTHOR_SEL).next()?.inner_html();
    let first_text = ammonia::clean(&document.select(&FIRST_TEXT_SEL).next()?.inner_html());
    let first_image = document
        .select(&FIRST_IMAGE_SEL)
        .next()
        .and_then(extract_image_url);
    let first_time = document
        .select(&FIRST_TIME_SEL)
        .next()
        .and_then(extract_time)
        .unwrap_or(OffsetDateTime::UNIX_EPOCH);
    let mut posts = vec![Post {
        text: first_text,
        image: first_image,
        id: None,
        time: first_time,
    }];
    for element in document.select(&POST_SEL) {
        let author = element.select(&AUTHOR_SEL).next().map(|el| el.inner_html());
        if author.as_ref() != Some(&first_author) {
            continue;
        }
        let text = match element.select(&TEXT_SEL).next() {
            Some(el) => ammonia::clean(&el.inner_html()),
            None => continue,
        };
        let image = element
            .select(&IMAGE_SEL)
            .next()
            .and_then(extract_image_url);
        let id = element
            .select(&ID_SEL)
            .next()
            .and_then(|el| el.value().attr("name"))
            .and_then(|n| {
                if id_ok(n) {
                    Some(String::from(n))
                } else {
                    None
                }
            });
        let time = element
            .select(&TIME_SEL)
            .next()
            .and_then(extract_time)
            .unwrap_or(OffsetDateTime::UNIX_EPOCH);
        posts.push(Post {
            text,
            image,
            id,
            time,
        });
    }
    Some(posts)
 }
 fn id_ok(id: &str) -> bool {
    id.chars().all(|c| c.is_ascii_digit())
 }
 fn extract_image_url(el: scraper::ElementRef) -> Option<String> {
    el.value()
        .attr("href")
        .and_then(|path| path.split('/').next_back())
        .map(|f| urlencoding::encode(f).into_owned())
 }
 fn extract_time(el: scraper::ElementRef) -> Option<OffsetDateTime> {
    el.text()
        .last()
        .and_then(|d| PrimitiveDateTime::parse(d.trim(), &DATETIME_FORMAT).ok())
        .map(|t| t.assume_offset(UtcOffset::current_local_offset().unwrap_or(UtcOffset::UTC)))
 }