Compare commits

..

No commits in common. "a76e3eec8375e013eeeeed0396fd4431cdd42a6c" and "3ca4dcc8070d16718fd137305fbff1667c08e445" have entirely different histories.

View file

@ -20,9 +20,6 @@ import requests
NUMBER_REGEX = re.compile('[0-9]+') NUMBER_REGEX = re.compile('[0-9]+')
DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store'] IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
@ -53,29 +50,6 @@ def extract(args):
if args.remove: if args.remove:
zip_path.unlink() zip_path.unlink()
def manual_input_metadata(work_id):
print(f"Don't know how to fetch metadata for {work_id}, input manually:")
title = input('Title: ')
circle = input('Circle [None]: ') or None
authors = [author.strip() for author in input('Authors (comma-separated): ').split(',') if author.strip()]
tags = [tag.strip() for tag in input('Tags (comma-separated): ').split(',') if tag.strip()]
date = input('Pub date (yyyy-mm-dd): ')
description = input('Description: ')
series = input('Series [None]: ') or None
return {
"id": work_id,
"title": title,
"circle": circle,
"authors": authors,
"tags": tags,
"date": date,
"description": description,
"series": series,
}
async def fetch_async(args): async def fetch_async(args):
con = sqlite3.connect(args.destdir / 'meta.db') con = sqlite3.connect(args.destdir / 'meta.db')
cur = con.cursor() cur = con.cursor()
@ -95,44 +69,32 @@ async def fetch_async(args):
if res.fetchone() is not None: if res.fetchone() is not None:
continue continue
if DLSITE_ID_REGEX.fullmatch(work_id): print(f'Fetching metadata for {work_id}')
print(f'Fetching DLSite metadata for {work_id}') metadata = await api.get_work(work_id)
dlsite_metadata = await api.get_work(work_id)
db_row = {
"id": work_id,
"title": dlsite_metadata.work_name,
"circle": dlsite_metadata.circle,
"date": dlsite_metadata.regist_date.date().isoformat(),
"description": dlsite_metadata.description,
"series": dlsite_metadata.series,
}
authors = dlsite_metadata.author or []
tags = dlsite_metadata.genre or []
thumbnail_url = dlsite_metadata.work_image
if thumbnail_url.startswith('//'):
thumbnail_url = 'https:' + thumbnail_url
elif FANZA_ID_REGEX.fullmatch(work_id):
db_row = manual_input_metadata(work_id)
authors = db_row.pop('authors')
tags = db_row.pop('tags')
thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg'
else:
print(f"Don't know how to fetch metadata for {work_id}, skipping")
continue
cur.execute( cur.execute(
"INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)", "INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)",
db_row, {
"id": work_id,
"title": metadata.work_name,
"circle": metadata.circle,
"date": metadata.regist_date.date().isoformat(),
"description": metadata.description,
"series": metadata.series,
},
) )
cur.executemany( cur.executemany(
"INSERT INTO authors VALUES(:author, :work)", "INSERT INTO authors VALUES(:author, :work)",
[{ "author": author, "work": work_id } for author in authors], [{ "author": author, "work": work_id } for author in (metadata.author or [])],
) )
cur.executemany( cur.executemany(
"INSERT INTO tags VALUES(:tag, :work)", "INSERT INTO tags VALUES(:tag, :work)",
[{ "tag": tag, "work": work_id } for tag in tags], [{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
) )
thumbnail_url = metadata.work_image
if thumbnail_url.startswith('//'):
thumbnail_url = 'https:' + thumbnail_url
ext = url_file_ext(thumbnail_url) ext = url_file_ext(thumbnail_url)
dest_file = thumbnails_dir / (work_id + ext) dest_file = thumbnails_dir / (work_id + ext)
print(f'Downloading thumbnail for {work_id} from {thumbnail_url}') print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')