starting dlsite curator script
This commit is contained in:
commit
12529498a1
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
env
|
90
dlibrary.py
Normal file
90
dlibrary.py
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import os.path
|
||||||
|
import sqlite3
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
from dlsite_async import DlsiteAPI
|
||||||
|
|
||||||
|
ZIP_DIR = "./zip"
|
||||||
|
EXTRACT_DIR = "./extract"
|
||||||
|
SITE_DIR = "./site"
|
||||||
|
DB_PATH = "./dlibrary.db"
|
||||||
|
|
||||||
|
def open_zipfile_with_encoding(path):
|
||||||
|
try:
|
||||||
|
return zipfile.ZipFile(path, metadata_encoding="utf-8")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return zipfile.ZipFile(path, metadata_encoding="shift-jis")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return zipfile.ZipFile(path, metadata_encoding="shift-jisx0213")
|
||||||
|
|
||||||
|
def extract(zip_path, remove=False):
|
||||||
|
work_id = os.path.splitext(os.path.basename(zip_path))[0]
|
||||||
|
work_extract_path = os.path.join(EXTRACT_DIR, work_id)
|
||||||
|
os.makedirs(work_extract_path)
|
||||||
|
|
||||||
|
with open_zipfile_with_encoding(zip_path) as z:
|
||||||
|
z.extractall(path=work_extract_path)
|
||||||
|
|
||||||
|
if remove:
|
||||||
|
os.remove(zip_path)
|
||||||
|
|
||||||
|
def extract_all(remove=False):
|
||||||
|
for f in os.listdir(ZIP_DIR):
|
||||||
|
if f.endswith('.zip'):
|
||||||
|
print(f'Extracting {f}')
|
||||||
|
extract(os.path.join(ZIP_DIR, f), remove=remove)
|
||||||
|
|
||||||
|
async def populate_db(refresh=False):
|
||||||
|
con = sqlite3.connect(DB_PATH)
|
||||||
|
cur = con.cursor()
|
||||||
|
|
||||||
|
cur.execute("CREATE TABLE IF NOT EXISTS works(id TEXT PRIMARY KEY, title TEXT, circle TEXT, date TEXT, description TEXT, thumbnail_url TEXT)")
|
||||||
|
cur.execute("CREATE TABLE IF NOT EXISTS authors(author TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")
|
||||||
|
cur.execute("CREATE TABLE IF NOT EXISTS tags(tag TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")
|
||||||
|
|
||||||
|
async with DlsiteAPI() as api:
|
||||||
|
for work_id in os.listdir(EXTRACT_DIR):
|
||||||
|
if not refresh:
|
||||||
|
res = cur.execute("SELECT id FROM works WHERE id = ?", (work_id,))
|
||||||
|
if res.fetchone() is not None:
|
||||||
|
print(f'Metadata for {work_id} is already cached, skipping')
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f'Fetching metadata for {work_id}')
|
||||||
|
metadata = await api.get_work(work_id)
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO works VALUES(:id, :title, :circle, :date, :description, :thumbnail_url)",
|
||||||
|
{
|
||||||
|
"id": work_id,
|
||||||
|
"title": metadata.work_name,
|
||||||
|
"circle": metadata.circle,
|
||||||
|
"date": metadata.regist_date.date().isoformat(),
|
||||||
|
"description": metadata.description,
|
||||||
|
"thumbnail_url": metadata.work_image,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
cur.executemany(
|
||||||
|
"INSERT INTO authors VALUES(:author, :work)",
|
||||||
|
[{ "author": author, "work": work_id } for author in (metadata.author or [])],
|
||||||
|
)
|
||||||
|
cur.executemany(
|
||||||
|
"INSERT INTO tags VALUES(:tag, :work)",
|
||||||
|
[{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
|
||||||
|
)
|
||||||
|
con.commit()
|
||||||
|
|
||||||
|
def process(work_id):
|
||||||
|
fetch_metadata(work_id)
|
||||||
|
get_thumbnail(work_id)
|
||||||
|
select_files(work_id)
|
||||||
|
extract_files(work_id)
|
||||||
|
|
Loading…
Reference in a new issue