diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index b2ecc06..3d49ca0 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -20,9 +20,6 @@ import requests NUMBER_REGEX = re.compile('[0-9]+') -DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$') -FANZA_ID_REGEX = re.compile('^d_[0-9]+$') - IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store'] @@ -53,29 +50,6 @@ def extract(args): if args.remove: zip_path.unlink() - -def manual_input_metadata(work_id): - print(f"Don't know how to fetch metadata for {work_id}, input manually:") - - title = input('Title: ') - circle = input('Circle [None]: ') or None - authors = [author.strip() for author in input('Authors (comma-separated): ').split(',') if author.strip()] - tags = [tag.strip() for tag in input('Tags (comma-separated): ').split(',') if tag.strip()] - date = input('Pub date (yyyy-mm-dd): ') - description = input('Description: ') - series = input('Series [None]: ') or None - - return { - "id": work_id, - "title": title, - "circle": circle, - "authors": authors, - "tags": tags, - "date": date, - "description": description, - "series": series, - } - async def fetch_async(args): con = sqlite3.connect(args.destdir / 'meta.db') cur = con.cursor() @@ -95,44 +69,32 @@ async def fetch_async(args): if res.fetchone() is not None: continue - if DLSITE_ID_REGEX.fullmatch(work_id): - print(f'Fetching DLSite metadata for {work_id}') - dlsite_metadata = await api.get_work(work_id) - db_row = { - "id": work_id, - "title": dlsite_metadata.work_name, - "circle": dlsite_metadata.circle, - "date": dlsite_metadata.regist_date.date().isoformat(), - "description": dlsite_metadata.description, - "series": dlsite_metadata.series, - } - authors = dlsite_metadata.author or [] - tags = dlsite_metadata.genre or [] - thumbnail_url = dlsite_metadata.work_image - if thumbnail_url.startswith('//'): - thumbnail_url = 'https:' + thumbnail_url - elif FANZA_ID_REGEX.fullmatch(work_id): - db_row = manual_input_metadata(work_id) - authors = db_row.pop('authors') - tags = db_row.pop('tags') - thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg' - else: - print(f"Don't know how to fetch metadata for {work_id}, skipping") - continue + print(f'Fetching metadata for {work_id}') + metadata = await api.get_work(work_id) cur.execute( "INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)", - db_row, + { + "id": work_id, + "title": metadata.work_name, + "circle": metadata.circle, + "date": metadata.regist_date.date().isoformat(), + "description": metadata.description, + "series": metadata.series, + }, ) cur.executemany( "INSERT INTO authors VALUES(:author, :work)", - [{ "author": author, "work": work_id } for author in authors], + [{ "author": author, "work": work_id } for author in (metadata.author or [])], ) cur.executemany( "INSERT INTO tags VALUES(:tag, :work)", - [{ "tag": tag, "work": work_id } for tag in tags], + [{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])], ) + thumbnail_url = metadata.work_image + if thumbnail_url.startswith('//'): + thumbnail_url = 'https:' + thumbnail_url ext = url_file_ext(thumbnail_url) dest_file = thumbnails_dir / (work_id + ext) print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')