starting dlsite curator script
This commit is contained in:
commit
12529498a1
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
env
|
90
dlibrary.py
Normal file
90
dlibrary.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import os.path
|
||||
import sqlite3
|
||||
import zipfile
|
||||
|
||||
from dlsite_async import DlsiteAPI
|
||||
|
||||
ZIP_DIR = "./zip"
|
||||
EXTRACT_DIR = "./extract"
|
||||
SITE_DIR = "./site"
|
||||
DB_PATH = "./dlibrary.db"
|
||||
|
||||
def open_zipfile_with_encoding(path):
|
||||
try:
|
||||
return zipfile.ZipFile(path, metadata_encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return zipfile.ZipFile(path, metadata_encoding="shift-jis")
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
return zipfile.ZipFile(path, metadata_encoding="shift-jisx0213")
|
||||
|
||||
def extract(zip_path, remove=False):
|
||||
work_id = os.path.splitext(os.path.basename(zip_path))[0]
|
||||
work_extract_path = os.path.join(EXTRACT_DIR, work_id)
|
||||
os.makedirs(work_extract_path)
|
||||
|
||||
with open_zipfile_with_encoding(zip_path) as z:
|
||||
z.extractall(path=work_extract_path)
|
||||
|
||||
if remove:
|
||||
os.remove(zip_path)
|
||||
|
||||
def extract_all(remove=False):
|
||||
for f in os.listdir(ZIP_DIR):
|
||||
if f.endswith('.zip'):
|
||||
print(f'Extracting {f}')
|
||||
extract(os.path.join(ZIP_DIR, f), remove=remove)
|
||||
|
||||
async def populate_db(refresh=False):
|
||||
con = sqlite3.connect(DB_PATH)
|
||||
cur = con.cursor()
|
||||
|
||||
cur.execute("CREATE TABLE IF NOT EXISTS works(id TEXT PRIMARY KEY, title TEXT, circle TEXT, date TEXT, description TEXT, thumbnail_url TEXT)")
|
||||
cur.execute("CREATE TABLE IF NOT EXISTS authors(author TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")
|
||||
cur.execute("CREATE TABLE IF NOT EXISTS tags(tag TEXT, work TEXT, FOREIGN KEY(work) REFERENCES works(id))")
|
||||
|
||||
async with DlsiteAPI() as api:
|
||||
for work_id in os.listdir(EXTRACT_DIR):
|
||||
if not refresh:
|
||||
res = cur.execute("SELECT id FROM works WHERE id = ?", (work_id,))
|
||||
if res.fetchone() is not None:
|
||||
print(f'Metadata for {work_id} is already cached, skipping')
|
||||
continue
|
||||
|
||||
print(f'Fetching metadata for {work_id}')
|
||||
metadata = await api.get_work(work_id)
|
||||
cur.execute(
|
||||
"INSERT INTO works VALUES(:id, :title, :circle, :date, :description, :thumbnail_url)",
|
||||
{
|
||||
"id": work_id,
|
||||
"title": metadata.work_name,
|
||||
"circle": metadata.circle,
|
||||
"date": metadata.regist_date.date().isoformat(),
|
||||
"description": metadata.description,
|
||||
"thumbnail_url": metadata.work_image,
|
||||
},
|
||||
)
|
||||
cur.executemany(
|
||||
"INSERT INTO authors VALUES(:author, :work)",
|
||||
[{ "author": author, "work": work_id } for author in (metadata.author or [])],
|
||||
)
|
||||
cur.executemany(
|
||||
"INSERT INTO tags VALUES(:tag, :work)",
|
||||
[{ "tag": tag, "work": work_id } for tag in (metadata.genre or [])],
|
||||
)
|
||||
con.commit()
|
||||
|
||||
def process(work_id):
|
||||
fetch_metadata(work_id)
|
||||
get_thumbnail(work_id)
|
||||
select_files(work_id)
|
||||
extract_files(work_id)
|
||||
|
Loading…
Reference in a new issue