diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index acc71b0..b2ecc06 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -21,6 +21,7 @@ import requests NUMBER_REGEX = re.compile('[0-9]+') DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$') +FANZA_ID_REGEX = re.compile('^d_[0-9]+$') IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] @@ -52,6 +53,29 @@ def extract(args): if args.remove: zip_path.unlink() + +def manual_input_metadata(work_id): + print(f"Don't know how to fetch metadata for {work_id}, input manually:") + + title = input('Title: ') + circle = input('Circle [None]: ') or None + authors = [author.strip() for author in input('Authors (comma-separated): ').split(',') if author.strip()] + tags = [tag.strip() for tag in input('Tags (comma-separated): ').split(',') if tag.strip()] + date = input('Pub date (yyyy-mm-dd): ') + description = input('Description: ') + series = input('Series [None]: ') or None + + return { + "id": work_id, + "title": title, + "circle": circle, + "authors": authors, + "tags": tags, + "date": date, + "description": description, + "series": series, + } + async def fetch_async(args): con = sqlite3.connect(args.destdir / 'meta.db') cur = con.cursor() @@ -71,36 +95,44 @@ async def fetch_async(args): if res.fetchone() is not None: continue - if not DLSITE_ID_REGEX.fullmatch(work_id): - print(f"{work_id} doesn't seem to be from DLSite, not attempting to fetch metadata") + if DLSITE_ID_REGEX.fullmatch(work_id): + print(f'Fetching DLSite metadata for {work_id}') + dlsite_metadata = await api.get_work(work_id) + db_row = { + "id": work_id, + "title": dlsite_metadata.work_name, + "circle": dlsite_metadata.circle, + "date": dlsite_metadata.regist_date.date().isoformat(), + "description": dlsite_metadata.description, + "series": dlsite_metadata.series, + } + authors = dlsite_metadata.author or [] + tags = dlsite_metadata.genre or [] + thumbnail_url = dlsite_metadata.work_image + if thumbnail_url.startswith('//'): + thumbnail_url = 'https:' + thumbnail_url + elif FANZA_ID_REGEX.fullmatch(work_id): + db_row = manual_input_metadata(work_id) + authors = db_row.pop('authors') + tags = db_row.pop('tags') + thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg' + else: + print(f"Don't know how to fetch metadata for {work_id}, skipping") continue - print(f'Fetching metadata for {work_id}') - metadata = await api.get_work(work_id) - cur.execute( "INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)", - { - "id": work_id, - "title": metadata.work_name, - "circle": metadata.circle, - "date": metadata.regist_date.date().isoformat(), - "description": metadata.description, - "series": metadata.series, - }, + db_row, ) cur.executemany( "INSERT INTO authors VALUES(:author, :work)", - [{ "author": author, "work": work_id } for author in (metadata.author or [])], + [{ "author": author, "work": work_id } for author in authors], ) cur.executemany( "INSERT INTO tags VALUES(:tag, :work)", - [{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])], + [{ "tag": tag, "work": work_id } for tag in tags], ) - thumbnail_url = metadata.work_image - if thumbnail_url.startswith('//'): - thumbnail_url = 'https:' + thumbnail_url ext = url_file_ext(thumbnail_url) dest_file = thumbnails_dir / (work_id + ext) print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')