#!/usr/bin/env python3 import asyncio import os import os.path import sqlite3 import zipfile from dlsite_async import DlsiteAPI ZIP_DIR = "./zip" EXTRACT_DIR = "./extract" SITE_DIR = "./site" DB_PATH = "./dlibrary.db" def open_zipfile_with_encoding(path): try: return zipfile.ZipFile(path, metadata_encoding="utf-8") except UnicodeDecodeError: pass try: return zipfile.ZipFile(path, metadata_encoding="shift-jis") except UnicodeDecodeError: pass return zipfile.ZipFile(path, metadata_encoding="shift-jisx0213") def extract(zip_path, remove=False): work_id = os.path.splitext(os.path.basename(zip_path))[0] work_extract_path = os.path.join(EXTRACT_DIR, work_id) os.makedirs(work_extract_path) with open_zipfile_with_encoding(zip_path) as z: z.extractall(path=work_extract_path) if remove: os.remove(zip_path) def extract_all(remove=False): for f in os.listdir(ZIP_DIR): if f.endswith('.zip'): print(f'Extracting {f}') extract(os.path.join(ZIP_DIR, f), remove=remove) async def populate_db(refresh=False): con = sqlite3.connect(DB_PATH) cur = con.cursor() cur.execute("CREATE TABLE IF NOT EXISTS works(id TEXT PRIMARY KEY, title TEXT, circle TEXT, date TEXT, description TEXT, thumbnail_url TEXT)") cur.execute("CREATE TABLE IF NOT EXISTS authors(author TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))") cur.execute("CREATE TABLE IF NOT EXISTS tags(tag TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))") async with DlsiteAPI() as api: for work_id in os.listdir(EXTRACT_DIR): if not refresh: res = cur.execute("SELECT id FROM works WHERE id = ?", (work_id,)) if res.fetchone() is not None: print(f'Metadata for {work_id} is already cached, skipping') continue print(f'Fetching metadata for {work_id}') metadata = await api.get_work(work_id) cur.execute( "INSERT INTO works VALUES(:id, :title, :circle, :date, :description, :thumbnail_url)", { "id": work_id, "title": metadata.work_name, "circle": metadata.circle, "date": metadata.regist_date.date().isoformat(), "description": metadata.description, "thumbnail_url": metadata.work_image, }, ) cur.executemany( "INSERT INTO authors VALUES(:author, :work)", [{ "author": author, "work": work_id } for author in (metadata.author or [])], ) cur.executemany( "INSERT INTO tags VALUES(:tag, :work)", [{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])], ) con.commit() def process(work_id): fetch_metadata(work_id) get_thumbnail(work_id) select_files(work_id) extract_files(work_id)