Compare commits
2 commits
3ca4dcc807
...
a76e3eec83
Author | SHA1 | Date | |
---|---|---|---|
xenofem | a76e3eec83 | ||
xenofem | 309f19574d |
|
@ -20,6 +20,9 @@ import requests
|
||||||
|
|
||||||
NUMBER_REGEX = re.compile('[0-9]+')
|
NUMBER_REGEX = re.compile('[0-9]+')
|
||||||
|
|
||||||
|
DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
|
||||||
|
FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
|
||||||
|
|
||||||
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
|
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
|
||||||
|
|
||||||
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
|
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
|
||||||
|
@ -50,6 +53,29 @@ def extract(args):
|
||||||
if args.remove:
|
if args.remove:
|
||||||
zip_path.unlink()
|
zip_path.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
def manual_input_metadata(work_id):
|
||||||
|
print(f"Don't know how to fetch metadata for {work_id}, input manually:")
|
||||||
|
|
||||||
|
title = input('Title: ')
|
||||||
|
circle = input('Circle [None]: ') or None
|
||||||
|
authors = [author.strip() for author in input('Authors (comma-separated): ').split(',') if author.strip()]
|
||||||
|
tags = [tag.strip() for tag in input('Tags (comma-separated): ').split(',') if tag.strip()]
|
||||||
|
date = input('Pub date (yyyy-mm-dd): ')
|
||||||
|
description = input('Description: ')
|
||||||
|
series = input('Series [None]: ') or None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": work_id,
|
||||||
|
"title": title,
|
||||||
|
"circle": circle,
|
||||||
|
"authors": authors,
|
||||||
|
"tags": tags,
|
||||||
|
"date": date,
|
||||||
|
"description": description,
|
||||||
|
"series": series,
|
||||||
|
}
|
||||||
|
|
||||||
async def fetch_async(args):
|
async def fetch_async(args):
|
||||||
con = sqlite3.connect(args.destdir / 'meta.db')
|
con = sqlite3.connect(args.destdir / 'meta.db')
|
||||||
cur = con.cursor()
|
cur = con.cursor()
|
||||||
|
@ -69,32 +95,44 @@ async def fetch_async(args):
|
||||||
if res.fetchone() is not None:
|
if res.fetchone() is not None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f'Fetching metadata for {work_id}')
|
if DLSITE_ID_REGEX.fullmatch(work_id):
|
||||||
metadata = await api.get_work(work_id)
|
print(f'Fetching DLSite metadata for {work_id}')
|
||||||
|
dlsite_metadata = await api.get_work(work_id)
|
||||||
|
db_row = {
|
||||||
|
"id": work_id,
|
||||||
|
"title": dlsite_metadata.work_name,
|
||||||
|
"circle": dlsite_metadata.circle,
|
||||||
|
"date": dlsite_metadata.regist_date.date().isoformat(),
|
||||||
|
"description": dlsite_metadata.description,
|
||||||
|
"series": dlsite_metadata.series,
|
||||||
|
}
|
||||||
|
authors = dlsite_metadata.author or []
|
||||||
|
tags = dlsite_metadata.genre or []
|
||||||
|
thumbnail_url = dlsite_metadata.work_image
|
||||||
|
if thumbnail_url.startswith('//'):
|
||||||
|
thumbnail_url = 'https:' + thumbnail_url
|
||||||
|
elif FANZA_ID_REGEX.fullmatch(work_id):
|
||||||
|
db_row = manual_input_metadata(work_id)
|
||||||
|
authors = db_row.pop('authors')
|
||||||
|
tags = db_row.pop('tags')
|
||||||
|
thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg'
|
||||||
|
else:
|
||||||
|
print(f"Don't know how to fetch metadata for {work_id}, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)",
|
"INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)",
|
||||||
{
|
db_row,
|
||||||
"id": work_id,
|
|
||||||
"title": metadata.work_name,
|
|
||||||
"circle": metadata.circle,
|
|
||||||
"date": metadata.regist_date.date().isoformat(),
|
|
||||||
"description": metadata.description,
|
|
||||||
"series": metadata.series,
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
cur.executemany(
|
cur.executemany(
|
||||||
"INSERT INTO authors VALUES(:author, :work)",
|
"INSERT INTO authors VALUES(:author, :work)",
|
||||||
[{ "author": author, "work": work_id } for author in (metadata.author or [])],
|
[{ "author": author, "work": work_id } for author in authors],
|
||||||
)
|
)
|
||||||
cur.executemany(
|
cur.executemany(
|
||||||
"INSERT INTO tags VALUES(:tag, :work)",
|
"INSERT INTO tags VALUES(:tag, :work)",
|
||||||
[{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
|
[{ "tag": tag, "work": work_id } for tag in tags],
|
||||||
)
|
)
|
||||||
|
|
||||||
thumbnail_url = metadata.work_image
|
|
||||||
if thumbnail_url.startswith('//'):
|
|
||||||
thumbnail_url = 'https:' + thumbnail_url
|
|
||||||
ext = url_file_ext(thumbnail_url)
|
ext = url_file_ext(thumbnail_url)
|
||||||
dest_file = thumbnails_dir / (work_id + ext)
|
dest_file = thumbnails_dir / (work_id + ext)
|
||||||
print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')
|
print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')
|
||||||
|
|
Loading…
Reference in a new issue