dlibrary/dlibrary.py

91 lines
3 KiB
Python
Raw Normal View History

2024-01-21 05:13:09 -05:00
#!/usr/bin/env python3
import asyncio
import os
import os.path
import sqlite3
import zipfile
from dlsite_async import DlsiteAPI
ZIP_DIR = "./zip"
EXTRACT_DIR = "./extract"
SITE_DIR = "./site"
DB_PATH = "./dlibrary.db"
def open_zipfile_with_encoding(path):
try:
return zipfile.ZipFile(path, metadata_encoding="utf-8")
except UnicodeDecodeError:
pass
try:
return zipfile.ZipFile(path, metadata_encoding="shift-jis")
except UnicodeDecodeError:
pass
return zipfile.ZipFile(path, metadata_encoding="shift-jisx0213")
def extract(zip_path, remove=False):
work_id = os.path.splitext(os.path.basename(zip_path))[0]
work_extract_path = os.path.join(EXTRACT_DIR, work_id)
os.makedirs(work_extract_path)
with open_zipfile_with_encoding(zip_path) as z:
z.extractall(path=work_extract_path)
if remove:
os.remove(zip_path)
def extract_all(remove=False):
for f in os.listdir(ZIP_DIR):
if f.endswith('.zip'):
print(f'Extracting {f}')
extract(os.path.join(ZIP_DIR, f), remove=remove)
async def populate_db(refresh=False):
con = sqlite3.connect(DB_PATH)
cur = con.cursor()
cur.execute("CREATE TABLE IF NOT EXISTS works(id TEXT PRIMARY KEY, title TEXT, circle TEXT, date TEXT, description TEXT, thumbnail_url TEXT)")
cur.execute("CREATE TABLE IF NOT EXISTS authors(author TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")
cur.execute("CREATE TABLE IF NOT EXISTS tags(tag TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")
async with DlsiteAPI() as api:
for work_id in os.listdir(EXTRACT_DIR):
if not refresh:
res = cur.execute("SELECT id FROM works WHERE id = ?", (work_id,))
if res.fetchone() is not None:
print(f'Metadata for {work_id} is already cached, skipping')
continue
print(f'Fetching metadata for {work_id}')
metadata = await api.get_work(work_id)
cur.execute(
"INSERT INTO works VALUES(:id, :title, :circle, :date, :description, :thumbnail_url)",
{
"id": work_id,
"title": metadata.work_name,
"circle": metadata.circle,
"date": metadata.regist_date.date().isoformat(),
"description": metadata.description,
"thumbnail_url": metadata.work_image,
},
)
cur.executemany(
"INSERT INTO authors VALUES(:author, :work)",
[{ "author": author, "work": work_id } for author in (metadata.author or [])],
)
cur.executemany(
"INSERT INTO tags VALUES(:tag, :work)",
[{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
)
con.commit()
def process(work_id):
fetch_metadata(work_id)
get_thumbnail(work_id)
select_files(work_id)
extract_files(work_id)