starting dlsite curator script

This commit is contained in:
xenofem 2024-01-21 05:13:09 -05:00
commit 12529498a1
2 changed files with 91 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
env

90
dlibrary.py Normal file
View file

@ -0,0 +1,90 @@
#!/usr/bin/env python3
import asyncio
import os
import os.path
import sqlite3
import zipfile
from dlsite_async import DlsiteAPI
ZIP_DIR = "./zip"
EXTRACT_DIR = "./extract"
SITE_DIR = "./site"
DB_PATH = "./dlibrary.db"
def open_zipfile_with_encoding(path):
try:
return zipfile.ZipFile(path, metadata_encoding="utf-8")
except UnicodeDecodeError:
pass
try:
return zipfile.ZipFile(path, metadata_encoding="shift-jis")
except UnicodeDecodeError:
pass
return zipfile.ZipFile(path, metadata_encoding="shift-jisx0213")
def extract(zip_path, remove=False):
work_id = os.path.splitext(os.path.basename(zip_path))[0]
work_extract_path = os.path.join(EXTRACT_DIR, work_id)
os.makedirs(work_extract_path)
with open_zipfile_with_encoding(zip_path) as z:
z.extractall(path=work_extract_path)
if remove:
os.remove(zip_path)
def extract_all(remove=False):
for f in os.listdir(ZIP_DIR):
if f.endswith('.zip'):
print(f'Extracting {f}')
extract(os.path.join(ZIP_DIR, f), remove=remove)
async def populate_db(refresh=False):
con = sqlite3.connect(DB_PATH)
cur = con.cursor()
cur.execute("CREATE TABLE IF NOT EXISTS works(id TEXT PRIMARY KEY, title TEXT, circle TEXT, date TEXT, description TEXT, thumbnail_url TEXT)")
cur.execute("CREATE TABLE IF NOT EXISTS authors(author TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")
cur.execute("CREATE TABLE IF NOT EXISTS tags(tag TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")
async with DlsiteAPI() as api:
for work_id in os.listdir(EXTRACT_DIR):
if not refresh:
res = cur.execute("SELECT id FROM works WHERE id = ?", (work_id,))
if res.fetchone() is not None:
print(f'Metadata for {work_id} is already cached, skipping')
continue
print(f'Fetching metadata for {work_id}')
metadata = await api.get_work(work_id)
cur.execute(
"INSERT INTO works VALUES(:id, :title, :circle, :date, :description, :thumbnail_url)",
{
"id": work_id,
"title": metadata.work_name,
"circle": metadata.circle,
"date": metadata.regist_date.date().isoformat(),
"description": metadata.description,
"thumbnail_url": metadata.work_image,
},
)
cur.executemany(
"INSERT INTO authors VALUES(:author, :work)",
[{ "author": author, "work": work_id } for author in (metadata.author or [])],
)
cur.executemany(
"INSERT INTO tags VALUES(:tag, :work)",
[{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
)
con.commit()
def process(work_id):
fetch_metadata(work_id)
get_thumbnail(work_id)
select_files(work_id)
extract_files(work_id)