questden frontend
This commit is contained in:
commit
0a2c67f47c
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
/target
|
2012
Cargo.lock
generated
Normal file
2012
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
15
Cargo.toml
Normal file
15
Cargo.toml
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
[package]
|
||||||
|
name = "mspaify"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
authors = ["xenofem <xenofem@xeno.science>"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
actix-web = { version = "4.0.0-rc.3" }
|
||||||
|
ammonia = "3.1"
|
||||||
|
lazy_static = "1.4"
|
||||||
|
scraper = "0.12"
|
||||||
|
reqwest = { version = "0.11", features = ["rustls-tls"], default-features = false }
|
||||||
|
time = { version = "0.3", features = ["local-offset", "macros", "formatting", "parsing"] }
|
||||||
|
tokio = { version = "1", features = ["full"] }
|
||||||
|
urlencoding = "2.1"
|
172
src/main.rs
Normal file
172
src/main.rs
Normal file
|
@ -0,0 +1,172 @@
|
||||||
|
use actix_web::{
|
||||||
|
get,
|
||||||
|
web::{self, HttpResponse},
|
||||||
|
App, HttpServer, Responder,
|
||||||
|
};
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
use time::{format_description::well_known::Rfc3339, OffsetDateTime, PrimitiveDateTime, UtcOffset};
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref FIRST_TEXT_SEL: Selector = Selector::parse("#delform > blockquote").unwrap();
|
||||||
|
static ref FIRST_IMAGE_SEL: Selector =
|
||||||
|
Selector::parse("#delform > .postwidth > .filesize > a").unwrap();
|
||||||
|
static ref FIRST_AUTHOR_SEL: Selector =
|
||||||
|
Selector::parse("#delform > .postwidth > .uid").unwrap();
|
||||||
|
static ref FIRST_TIME_SEL: Selector = Selector::parse("#delform > .postwidth > label").unwrap();
|
||||||
|
static ref POST_SEL: Selector = Selector::parse("#delform > table").unwrap();
|
||||||
|
static ref TEXT_SEL: Selector = Selector::parse("blockquote").unwrap();
|
||||||
|
static ref IMAGE_SEL: Selector = Selector::parse(".postwidth > .filesize > a").unwrap();
|
||||||
|
static ref AUTHOR_SEL: Selector = Selector::parse(".postwidth > .uid").unwrap();
|
||||||
|
static ref TIME_SEL: Selector = Selector::parse(".postwidth > label").unwrap();
|
||||||
|
static ref ID_SEL: Selector = Selector::parse(".postwidth > a:first-child").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
const DATETIME_FORMAT: &[time::format_description::FormatItem] = time::macros::format_description!(
|
||||||
|
"[year]/[month]/[day]([weekday repr:short])[hour repr:24]:[minute]"
|
||||||
|
);
|
||||||
|
|
||||||
|
#[actix_web::main]
|
||||||
|
async fn main() -> std::io::Result<()> {
|
||||||
|
HttpServer::new(|| App::new().service(feed))
|
||||||
|
.bind("127.0.0.1:10413")?
|
||||||
|
.run()
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Post {
|
||||||
|
text: String,
|
||||||
|
image: Option<String>,
|
||||||
|
id: Option<String>,
|
||||||
|
time: OffsetDateTime,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[get("/{thread_id}/feed.xml")]
|
||||||
|
async fn feed(path: web::Path<String>) -> impl Responder {
|
||||||
|
let thread_id = path.into_inner();
|
||||||
|
if !id_ok(&thread_id) {
|
||||||
|
return HttpResponse::BadRequest().finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
let thread = match get_posts(&thread_id).await {
|
||||||
|
Some(posts) => posts,
|
||||||
|
None => return HttpResponse::InternalServerError().finish(),
|
||||||
|
};
|
||||||
|
let mut feed = format!(
|
||||||
|
r#"<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>QuestDen Thread {0}</title>
|
||||||
|
<link href="https://questden.org/kusaba/quest/res/{0}.html" />
|
||||||
|
<updated>{1}</updated>
|
||||||
|
"#,
|
||||||
|
thread_id,
|
||||||
|
thread
|
||||||
|
.last()
|
||||||
|
.and_then(|p| p.time.format(&Rfc3339).ok())
|
||||||
|
.unwrap_or_default()
|
||||||
|
);
|
||||||
|
for post in thread.iter() {
|
||||||
|
feed.push_str(
|
||||||
|
&format!(r#"<entry>
|
||||||
|
<title>{0}</title>
|
||||||
|
<updated>{4}</updated>
|
||||||
|
<content type="html">{1}{2}</content>
|
||||||
|
<link rel="alternate" type="text/html" href="https://questden.org/kusaba/quest/res/{3}.html#{0}" />
|
||||||
|
</entry>
|
||||||
|
"#, post.id.as_ref().map(|s| s.as_str()).unwrap_or(""), post.image.as_ref().map(|f| ammonia::clean_text(&format!(r#"<img src="https://questden.org/kusaba/quest/src/{}" />"#, f))).unwrap_or_default(), ammonia::clean_text(&post.text), thread_id, post.time.format(&Rfc3339).unwrap_or_default()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
feed.push_str("</feed>");
|
||||||
|
HttpResponse::Ok().body(feed)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_posts(thread_id: &str) -> Option<Vec<Post>> {
|
||||||
|
let body = reqwest::get(format!(
|
||||||
|
"https://questden.org/kusaba/quest/res/{}.html",
|
||||||
|
thread_id
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.ok()?
|
||||||
|
.text()
|
||||||
|
.await
|
||||||
|
.ok()?;
|
||||||
|
let document = Html::parse_document(&body);
|
||||||
|
|
||||||
|
let first_author = document.select(&FIRST_AUTHOR_SEL).next()?.inner_html();
|
||||||
|
let first_text = ammonia::clean(&document.select(&FIRST_TEXT_SEL).next()?.inner_html());
|
||||||
|
let first_image = document
|
||||||
|
.select(&FIRST_IMAGE_SEL)
|
||||||
|
.next()
|
||||||
|
.and_then(extract_image_url);
|
||||||
|
let first_time = document
|
||||||
|
.select(&FIRST_TIME_SEL)
|
||||||
|
.next()
|
||||||
|
.and_then(extract_time)
|
||||||
|
.unwrap_or(OffsetDateTime::UNIX_EPOCH);
|
||||||
|
|
||||||
|
let mut posts = vec![Post {
|
||||||
|
text: first_text,
|
||||||
|
image: first_image,
|
||||||
|
id: None,
|
||||||
|
time: first_time,
|
||||||
|
}];
|
||||||
|
|
||||||
|
for element in document.select(&POST_SEL) {
|
||||||
|
let author = element.select(&AUTHOR_SEL).next().map(|el| el.inner_html());
|
||||||
|
if author.as_ref() != Some(&first_author) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let text = match element.select(&TEXT_SEL).next() {
|
||||||
|
Some(el) => ammonia::clean(&el.inner_html()),
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
let image = element
|
||||||
|
.select(&IMAGE_SEL)
|
||||||
|
.next()
|
||||||
|
.and_then(extract_image_url);
|
||||||
|
let id = element
|
||||||
|
.select(&ID_SEL)
|
||||||
|
.next()
|
||||||
|
.and_then(|el| el.value().attr("name"))
|
||||||
|
.and_then(|n| {
|
||||||
|
if id_ok(n) {
|
||||||
|
Some(String::from(n))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let time = element
|
||||||
|
.select(&TIME_SEL)
|
||||||
|
.next()
|
||||||
|
.and_then(extract_time)
|
||||||
|
.unwrap_or(OffsetDateTime::UNIX_EPOCH);
|
||||||
|
posts.push(Post {
|
||||||
|
text,
|
||||||
|
image,
|
||||||
|
id,
|
||||||
|
time,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(posts)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn id_ok(id: &str) -> bool {
|
||||||
|
id.chars().all(|c| c.is_ascii_digit())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_image_url(el: scraper::ElementRef) -> Option<String> {
|
||||||
|
el.value()
|
||||||
|
.attr("href")
|
||||||
|
.and_then(|path| path.split('/').next_back())
|
||||||
|
.map(|f| urlencoding::encode(f).into_owned())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_time(el: scraper::ElementRef) -> Option<OffsetDateTime> {
|
||||||
|
el.text()
|
||||||
|
.last()
|
||||||
|
.and_then(|d| PrimitiveDateTime::parse(d.trim(), &DATETIME_FORMAT).ok())
|
||||||
|
.map(|t| t.assume_offset(UtcOffset::current_local_offset().unwrap_or(UtcOffset::UTC)))
|
||||||
|
}
|
Loading…
Reference in a new issue