support fanza works with manual metadata input

2024-01-29 04:11:55 -05:00 · 2024-01-29 04:11:55 -05:00 · a76e3eec83
parent 309f19574d
commit a76e3eec83
1 changed files with 50 additions and 18 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -21,6 +21,7 @@ import requests
 NUMBER_REGEX = re.compile('[0-9]+')

 DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
+FANZA_ID_REGEX = re.compile('^d_[0-9]+$')

 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']

@ -52,6 +53,29 @@ def extract(args):
        if args.remove:
            zip_path.unlink()

+
+def manual_input_metadata(work_id):
+    print(f"Don't know how to fetch metadata for {work_id}, input manually:")
+
+    title = input('Title: ')
+    circle = input('Circle [None]: ') or None
+    authors = [author.strip() for author in input('Authors (comma-separated): ').split(',') if author.strip()]
+    tags = [tag.strip() for tag in input('Tags (comma-separated): ').split(',') if tag.strip()]
+    date = input('Pub date (yyyy-mm-dd): ')
+    description = input('Description: ')
+    series = input('Series [None]: ') or None
+
+    return {
+        "id": work_id,
+        "title": title,
+        "circle": circle,
+        "authors": authors,
+        "tags": tags,
+        "date": date,
+        "description": description,
+        "series": series,
+    }
+
 async def fetch_async(args):
    con = sqlite3.connect(args.destdir / 'meta.db')
    cur = con.cursor()
@ -71,36 +95,44 @@ async def fetch_async(args):
            if res.fetchone() is not None:
                continue

-            if not DLSITE_ID_REGEX.fullmatch(work_id):
-                print(f"{work_id} doesn't seem to be from DLSite, not attempting to fetch metadata")
+            if DLSITE_ID_REGEX.fullmatch(work_id):
+                print(f'Fetching DLSite metadata for {work_id}')
+                dlsite_metadata = await api.get_work(work_id)
+                db_row = {
+                    "id": work_id,
+                    "title": dlsite_metadata.work_name,
+                    "circle": dlsite_metadata.circle,
+                    "date": dlsite_metadata.regist_date.date().isoformat(),
+                    "description": dlsite_metadata.description,
+                    "series": dlsite_metadata.series,
+                }
+                authors = dlsite_metadata.author or []
+                tags = dlsite_metadata.genre or []
+                thumbnail_url = dlsite_metadata.work_image
+                if thumbnail_url.startswith('//'):
+                    thumbnail_url = 'https:' + thumbnail_url
+            elif FANZA_ID_REGEX.fullmatch(work_id):
+                db_row = manual_input_metadata(work_id)
+                authors = db_row.pop('authors')
+                tags = db_row.pop('tags')
+                thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg'
+            else:
+                print(f"Don't know how to fetch metadata for {work_id}, skipping")
                continue

-            print(f'Fetching metadata for {work_id}')
-            metadata = await api.get_work(work_id)
-
            cur.execute(
                "INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)",
-                {
-                    "id": work_id,
-                    "title": metadata.work_name,
-                    "circle": metadata.circle,
-                    "date": metadata.regist_date.date().isoformat(),
-                    "description": metadata.description,
-                    "series": metadata.series,
-                },
+                db_row,
            )
            cur.executemany(
                "INSERT INTO authors VALUES(:author, :work)",
-                [{ "author": author, "work": work_id } for author in (metadata.author or [])],
+                [{ "author": author, "work": work_id } for author in authors],
            )
            cur.executemany(
                "INSERT INTO tags VALUES(:tag, :work)",
-                [{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
+                [{ "tag": tag, "work": work_id } for tag in tags],
            )

-            thumbnail_url = metadata.work_image
-            if thumbnail_url.startswith('//'):
-                thumbnail_url = 'https:' + thumbnail_url
            ext = url_file_ext(thumbnail_url)
            dest_file = thumbnails_dir / (work_id + ext)
            print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')