dlibrary/dlibrary.py

#!/usr/bin/env python3

import asyncio
import os
import os.path
import sqlite3
import zipfile

from dlsite_async import DlsiteAPI

ZIP_DIR = "./zip"
EXTRACT_DIR = "./extract"
SITE_DIR = "./site"
DB_PATH = "./dlibrary.db"

def open_zipfile_with_encoding(path):
    try:
        return zipfile.ZipFile(path, metadata_encoding="utf-8")
    except UnicodeDecodeError:
        pass

    try:
        return zipfile.ZipFile(path, metadata_encoding="shift-jis")
    except UnicodeDecodeError:
        pass

    return zipfile.ZipFile(path, metadata_encoding="shift-jisx0213")

def extract(zip_path, remove=False):
    work_id = os.path.splitext(os.path.basename(zip_path))[0]
    work_extract_path = os.path.join(EXTRACT_DIR, work_id)
    os.makedirs(work_extract_path)

    with open_zipfile_with_encoding(zip_path) as z:
        z.extractall(path=work_extract_path)

    if remove:
        os.remove(zip_path)

def extract_all(remove=False):
    for f in os.listdir(ZIP_DIR):
        if f.endswith('.zip'):
            print(f'Extracting {f}')
            extract(os.path.join(ZIP_DIR, f), remove=remove)

async def populate_db(refresh=False):
    con = sqlite3.connect(DB_PATH)
    cur = con.cursor()

    cur.execute("CREATE TABLE IF NOT EXISTS works(id TEXT PRIMARY KEY, title TEXT, circle TEXT, date TEXT, description TEXT, thumbnail_url TEXT)")
    cur.execute("CREATE TABLE IF NOT EXISTS authors(author TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")
    cur.execute("CREATE TABLE IF NOT EXISTS tags(tag TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")

    async with DlsiteAPI() as api:
        for work_id in os.listdir(EXTRACT_DIR):
            if not refresh:
                res = cur.execute("SELECT id FROM works WHERE id = ?", (work_id,))
                if res.fetchone() is not None:
                    print(f'Metadata for {work_id} is already cached, skipping')
                    continue

            print(f'Fetching metadata for {work_id}')
            metadata = await api.get_work(work_id)
            cur.execute(
                "INSERT INTO works VALUES(:id, :title, :circle, :date, :description, :thumbnail_url)",
                {
                    "id": work_id,
                    "title": metadata.work_name,
                    "circle": metadata.circle,
                    "date": metadata.regist_date.date().isoformat(),
                    "description": metadata.description,
                    "thumbnail_url": metadata.work_image,
                },
            )
            cur.executemany(
                "INSERT INTO authors VALUES(:author, :work)",
                [{ "author": author, "work": work_id } for author in (metadata.author or [])],
            )
            cur.executemany(
                "INSERT INTO tags VALUES(:tag, :work)",
                [{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
            )
            con.commit()

def process(work_id):
    fetch_metadata(work_id)
    get_thumbnail(work_id)
    select_files(work_id)
    extract_files(work_id)