use actix_web::{ get, web::{self, HttpResponse}, App, HttpServer, Responder, }; use lazy_static::lazy_static; use scraper::{Html, Selector}; use time::{format_description::well_known::Rfc3339, OffsetDateTime, PrimitiveDateTime, UtcOffset}; lazy_static! { static ref FIRST_TEXT_SEL: Selector = Selector::parse("#delform > blockquote").unwrap(); static ref FIRST_IMAGE_SEL: Selector = Selector::parse("#delform > .postwidth > .filesize > a").unwrap(); static ref FIRST_AUTHOR_SEL: Selector = Selector::parse("#delform > .postwidth > .uid").unwrap(); static ref FIRST_TIME_SEL: Selector = Selector::parse("#delform > .postwidth > label").unwrap(); static ref POST_SEL: Selector = Selector::parse("#delform > table").unwrap(); static ref TEXT_SEL: Selector = Selector::parse("blockquote").unwrap(); static ref IMAGE_SEL: Selector = Selector::parse(".postwidth > .filesize > a").unwrap(); static ref AUTHOR_SEL: Selector = Selector::parse(".postwidth > .uid").unwrap(); static ref TIME_SEL: Selector = Selector::parse(".postwidth > label").unwrap(); static ref ID_SEL: Selector = Selector::parse(".postwidth > a:first-child").unwrap(); } const DATETIME_FORMAT: &[time::format_description::FormatItem] = time::macros::format_description!( "[year]/[month]/[day]([weekday repr:short])[hour repr:24]:[minute]" ); #[actix_web::main] async fn main() -> std::io::Result<()> { HttpServer::new(|| App::new().service(feed)) .bind("127.0.0.1:10413")? .run() .await } #[derive(Debug)] struct Post { text: String, image: Option, id: Option, time: OffsetDateTime, } #[get("/{thread_id}/feed.xml")] async fn feed(path: web::Path) -> impl Responder { let thread_id = path.into_inner(); if !id_ok(&thread_id) { return HttpResponse::BadRequest().finish(); } let thread = match get_posts(&thread_id).await { Some(posts) => posts, None => return HttpResponse::InternalServerError().finish(), }; let mut feed = format!( r#" QuestDen Thread {0} {1} "#, thread_id, thread .last() .and_then(|p| p.time.format(&Rfc3339).ok()) .unwrap_or_default() ); for post in thread.iter() { feed.push_str( &format!(r#" {0} {4} {1}{2} "#, post.id.as_ref().map(|s| s.as_str()).unwrap_or(""), post.image.as_ref().map(|f| ammonia::clean_text(&format!(r#""#, f))).unwrap_or_default(), ammonia::clean_text(&post.text), thread_id, post.time.format(&Rfc3339).unwrap_or_default() ) ); } feed.push_str(""); HttpResponse::Ok().body(feed) } async fn get_posts(thread_id: &str) -> Option> { let body = reqwest::get(format!( "https://questden.org/kusaba/quest/res/{}.html", thread_id )) .await .ok()? .text() .await .ok()?; let document = Html::parse_document(&body); let first_author = document.select(&FIRST_AUTHOR_SEL).next()?.inner_html(); let first_text = ammonia::clean(&document.select(&FIRST_TEXT_SEL).next()?.inner_html()); let first_image = document .select(&FIRST_IMAGE_SEL) .next() .and_then(extract_image_url); let first_time = document .select(&FIRST_TIME_SEL) .next() .and_then(extract_time) .unwrap_or(OffsetDateTime::UNIX_EPOCH); let mut posts = vec![Post { text: first_text, image: first_image, id: None, time: first_time, }]; for element in document.select(&POST_SEL) { let author = element.select(&AUTHOR_SEL).next().map(|el| el.inner_html()); if author.as_ref() != Some(&first_author) { continue; } let text = match element.select(&TEXT_SEL).next() { Some(el) => ammonia::clean(&el.inner_html()), None => continue, }; let image = element .select(&IMAGE_SEL) .next() .and_then(extract_image_url); let id = element .select(&ID_SEL) .next() .and_then(|el| el.value().attr("name")) .and_then(|n| { if id_ok(n) { Some(String::from(n)) } else { None } }); let time = element .select(&TIME_SEL) .next() .and_then(extract_time) .unwrap_or(OffsetDateTime::UNIX_EPOCH); posts.push(Post { text, image, id, time, }); } Some(posts) } fn id_ok(id: &str) -> bool { id.chars().all(|c| c.is_ascii_digit()) } fn extract_image_url(el: scraper::ElementRef) -> Option { el.value() .attr("href") .and_then(|path| path.split('/').next_back()) .map(|f| urlencoding::encode(f).into_owned()) } fn extract_time(el: scraper::ElementRef) -> Option { el.text() .last() .and_then(|d| PrimitiveDateTime::parse(d.trim(), &DATETIME_FORMAT).ok()) .map(|t| t.assume_offset(UtcOffset::current_local_offset().unwrap_or(UtcOffset::UTC))) }