support fanza works with manual metadata input

This commit is contained in:
xenofem 2024-01-29 04:11:55 -05:00
parent 309f19574d
commit a76e3eec83

View file

@ -21,6 +21,7 @@ import requests
NUMBER_REGEX = re.compile('[0-9]+')
DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
@ -52,6 +53,29 @@ def extract(args):
if args.remove:
zip_path.unlink()
def manual_input_metadata(work_id):
print(f"Don't know how to fetch metadata for {work_id}, input manually:")
title = input('Title: ')
circle = input('Circle [None]: ') or None
authors = [author.strip() for author in input('Authors (comma-separated): ').split(',') if author.strip()]
tags = [tag.strip() for tag in input('Tags (comma-separated): ').split(',') if tag.strip()]
date = input('Pub date (yyyy-mm-dd): ')
description = input('Description: ')
series = input('Series [None]: ') or None
return {
"id": work_id,
"title": title,
"circle": circle,
"authors": authors,
"tags": tags,
"date": date,
"description": description,
"series": series,
}
async def fetch_async(args):
con = sqlite3.connect(args.destdir / 'meta.db')
cur = con.cursor()
@ -71,36 +95,44 @@ async def fetch_async(args):
if res.fetchone() is not None:
continue
if not DLSITE_ID_REGEX.fullmatch(work_id):
print(f"{work_id} doesn't seem to be from DLSite, not attempting to fetch metadata")
if DLSITE_ID_REGEX.fullmatch(work_id):
print(f'Fetching DLSite metadata for {work_id}')
dlsite_metadata = await api.get_work(work_id)
db_row = {
"id": work_id,
"title": dlsite_metadata.work_name,
"circle": dlsite_metadata.circle,
"date": dlsite_metadata.regist_date.date().isoformat(),
"description": dlsite_metadata.description,
"series": dlsite_metadata.series,
}
authors = dlsite_metadata.author or []
tags = dlsite_metadata.genre or []
thumbnail_url = dlsite_metadata.work_image
if thumbnail_url.startswith('//'):
thumbnail_url = 'https:' + thumbnail_url
elif FANZA_ID_REGEX.fullmatch(work_id):
db_row = manual_input_metadata(work_id)
authors = db_row.pop('authors')
tags = db_row.pop('tags')
thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg'
else:
print(f"Don't know how to fetch metadata for {work_id}, skipping")
continue
print(f'Fetching metadata for {work_id}')
metadata = await api.get_work(work_id)
cur.execute(
"INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)",
{
"id": work_id,
"title": metadata.work_name,
"circle": metadata.circle,
"date": metadata.regist_date.date().isoformat(),
"description": metadata.description,
"series": metadata.series,
},
db_row,
)
cur.executemany(
"INSERT INTO authors VALUES(:author, :work)",
[{ "author": author, "work": work_id } for author in (metadata.author or [])],
[{ "author": author, "work": work_id } for author in authors],
)
cur.executemany(
"INSERT INTO tags VALUES(:tag, :work)",
[{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
[{ "tag": tag, "work": work_id } for tag in tags],
)
thumbnail_url = metadata.work_image
if thumbnail_url.startswith('//'):
thumbnail_url = 'https:' + thumbnail_url
ext = url_file_ext(thumbnail_url)
dest_file = thumbnails_dir / (work_id + ext)
print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')