support fanza works with manual metadata input

don't try to fetch dlsite metadata for non-dlsite works
2024-01-29 04:11:55 -05:00 · 2024-01-29 03:01:35 -05:00
1 changed files with 53 additions and 15 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -20,6 +20,9 @@ import requests
 NUMBER_REGEX = re.compile('[0-9]+')
 DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
 FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
 IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
@ -50,6 +53,29 @@ def extract(args):
        if args.remove:
            zip_path.unlink()
 def manual_input_metadata(work_id):
    print(f"Don't know how to fetch metadata for {work_id}, input manually:")
    title = input('Title: ')
    circle = input('Circle [None]: ') or None
    authors = [author.strip() for author in input('Authors (comma-separated): ').split(',') if author.strip()]
    tags = [tag.strip() for tag in input('Tags (comma-separated): ').split(',') if tag.strip()]
    date = input('Pub date (yyyy-mm-dd): ')
    description = input('Description: ')
    series = input('Series [None]: ') or None
    return {
        "id": work_id,
        "title": title,
        "circle": circle,
        "authors": authors,
        "tags": tags,
        "date": date,
        "description": description,
        "series": series,
    }
 async def fetch_async(args):
    con = sqlite3.connect(args.destdir / 'meta.db')
    cur = con.cursor()
@ -69,32 +95,44 @@ async def fetch_async(args):
            if res.fetchone() is not None:
                continue
-            print(f'Fetching metadata for {work_id}')
+            if DLSITE_ID_REGEX.fullmatch(work_id):
-            metadata = await api.get_work(work_id)
+                print(f'Fetching DLSite metadata for {work_id}')
                dlsite_metadata = await api.get_work(work_id)
                db_row = {
                    "id": work_id,
                    "title": dlsite_metadata.work_name,
                    "circle": dlsite_metadata.circle,
                    "date": dlsite_metadata.regist_date.date().isoformat(),
                    "description": dlsite_metadata.description,
                    "series": dlsite_metadata.series,
                }
                authors = dlsite_metadata.author or []
                tags = dlsite_metadata.genre or []
                thumbnail_url = dlsite_metadata.work_image
                if thumbnail_url.startswith('//'):
                    thumbnail_url = 'https:' + thumbnail_url
            elif FANZA_ID_REGEX.fullmatch(work_id):
                db_row = manual_input_metadata(work_id)
                authors = db_row.pop('authors')
                tags = db_row.pop('tags')
                thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg'
            else:
                print(f"Don't know how to fetch metadata for {work_id}, skipping")
                continue
            cur.execute(
                "INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)",
-                {
+                db_row,
                    "id": work_id,
                    "title": metadata.work_name,
                    "circle": metadata.circle,
                    "date": metadata.regist_date.date().isoformat(),
                    "description": metadata.description,
                    "series": metadata.series,
                },
            )
            cur.executemany(
                "INSERT INTO authors VALUES(:author, :work)",
-                [{ "author": author, "work": work_id } for author in (metadata.author or [])],
+                [{ "author": author, "work": work_id } for author in authors],
            )
            cur.executemany(
                "INSERT INTO tags VALUES(:tag, :work)",
-                [{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
+                [{ "tag": tag, "work": work_id } for tag in tags],
            )
            thumbnail_url = metadata.work_image
            if thumbnail_url.startswith('//'):
                thumbnail_url = 'https:' + thumbnail_url
            ext = url_file_ext(thumbnail_url)
            dest_file = thumbnails_dir / (work_id + ext)
            print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')
Author	SHA1	Message	Date
xenofem	a76e3eec83	support fanza works with manual metadata input	2024-01-29 04:11:55 -05:00
xenofem	309f19574d	don't try to fetch dlsite metadata for non-dlsite works	2024-01-29 03:01:35 -05:00