Compare commits
No commits in common. "a76e3eec8375e013eeeeed0396fd4431cdd42a6c" and "3ca4dcc8070d16718fd137305fbff1667c08e445" have entirely different histories.
a76e3eec83
...
3ca4dcc807
|
@ -20,9 +20,6 @@ import requests
|
|||
|
||||
NUMBER_REGEX = re.compile('[0-9]+')
|
||||
|
||||
DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
|
||||
FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
|
||||
|
||||
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
|
||||
|
||||
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
|
||||
|
@ -53,29 +50,6 @@ def extract(args):
|
|||
if args.remove:
|
||||
zip_path.unlink()
|
||||
|
||||
|
||||
def manual_input_metadata(work_id):
|
||||
print(f"Don't know how to fetch metadata for {work_id}, input manually:")
|
||||
|
||||
title = input('Title: ')
|
||||
circle = input('Circle [None]: ') or None
|
||||
authors = [author.strip() for author in input('Authors (comma-separated): ').split(',') if author.strip()]
|
||||
tags = [tag.strip() for tag in input('Tags (comma-separated): ').split(',') if tag.strip()]
|
||||
date = input('Pub date (yyyy-mm-dd): ')
|
||||
description = input('Description: ')
|
||||
series = input('Series [None]: ') or None
|
||||
|
||||
return {
|
||||
"id": work_id,
|
||||
"title": title,
|
||||
"circle": circle,
|
||||
"authors": authors,
|
||||
"tags": tags,
|
||||
"date": date,
|
||||
"description": description,
|
||||
"series": series,
|
||||
}
|
||||
|
||||
async def fetch_async(args):
|
||||
con = sqlite3.connect(args.destdir / 'meta.db')
|
||||
cur = con.cursor()
|
||||
|
@ -95,44 +69,32 @@ async def fetch_async(args):
|
|||
if res.fetchone() is not None:
|
||||
continue
|
||||
|
||||
if DLSITE_ID_REGEX.fullmatch(work_id):
|
||||
print(f'Fetching DLSite metadata for {work_id}')
|
||||
dlsite_metadata = await api.get_work(work_id)
|
||||
db_row = {
|
||||
"id": work_id,
|
||||
"title": dlsite_metadata.work_name,
|
||||
"circle": dlsite_metadata.circle,
|
||||
"date": dlsite_metadata.regist_date.date().isoformat(),
|
||||
"description": dlsite_metadata.description,
|
||||
"series": dlsite_metadata.series,
|
||||
}
|
||||
authors = dlsite_metadata.author or []
|
||||
tags = dlsite_metadata.genre or []
|
||||
thumbnail_url = dlsite_metadata.work_image
|
||||
if thumbnail_url.startswith('//'):
|
||||
thumbnail_url = 'https:' + thumbnail_url
|
||||
elif FANZA_ID_REGEX.fullmatch(work_id):
|
||||
db_row = manual_input_metadata(work_id)
|
||||
authors = db_row.pop('authors')
|
||||
tags = db_row.pop('tags')
|
||||
thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg'
|
||||
else:
|
||||
print(f"Don't know how to fetch metadata for {work_id}, skipping")
|
||||
continue
|
||||
print(f'Fetching metadata for {work_id}')
|
||||
metadata = await api.get_work(work_id)
|
||||
|
||||
cur.execute(
|
||||
"INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)",
|
||||
db_row,
|
||||
{
|
||||
"id": work_id,
|
||||
"title": metadata.work_name,
|
||||
"circle": metadata.circle,
|
||||
"date": metadata.regist_date.date().isoformat(),
|
||||
"description": metadata.description,
|
||||
"series": metadata.series,
|
||||
},
|
||||
)
|
||||
cur.executemany(
|
||||
"INSERT INTO authors VALUES(:author, :work)",
|
||||
[{ "author": author, "work": work_id } for author in authors],
|
||||
[{ "author": author, "work": work_id } for author in (metadata.author or [])],
|
||||
)
|
||||
cur.executemany(
|
||||
"INSERT INTO tags VALUES(:tag, :work)",
|
||||
[{ "tag": tag, "work": work_id } for tag in tags],
|
||||
[{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
|
||||
)
|
||||
|
||||
thumbnail_url = metadata.work_image
|
||||
if thumbnail_url.startswith('//'):
|
||||
thumbnail_url = 'https:' + thumbnail_url
|
||||
ext = url_file_ext(thumbnail_url)
|
||||
dest_file = thumbnails_dir / (work_id + ext)
|
||||
print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')
|
||||
|
|
Loading…
Reference in a new issue