optimizations and extra debug info for generate mode
This commit is contained in:
parent
a822a504e8
commit
21c6e11228
|
@ -14,6 +14,7 @@ import shutil
|
|||
import sqlite3
|
||||
import stat
|
||||
import textwrap
|
||||
import time
|
||||
import unicodedata
|
||||
from urllib.parse import urlparse
|
||||
import zipfile
|
||||
|
@ -105,7 +106,7 @@ READONLY_DIR = READONLY_FILE | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
|
|||
debug_mode = False
|
||||
def debug(s):
|
||||
if debug_mode:
|
||||
print(s)
|
||||
print(f'{time.strftime("%Y-%m-%d %H:%M:%S")} - {s}')
|
||||
|
||||
def open_zipfile_with_encoding(path):
|
||||
for enc in ["utf-8", "shift-jis", "shift-jisx0213"]:
|
||||
|
@ -1046,8 +1047,7 @@ def copy_recursive(src, dest):
|
|||
|
||||
|
||||
memoized_similarities = {}
|
||||
|
||||
def similarity(a, b):
|
||||
def similarity(a, b, cache_cur=None):
|
||||
if len(a) < len(b) or (len(a) == len(b) and a < b):
|
||||
shorter = a
|
||||
longer = b
|
||||
|
@ -1060,13 +1060,22 @@ def similarity(a, b):
|
|||
if (shorter, longer) in memoized_similarities:
|
||||
return memoized_similarities[(shorter, longer)]
|
||||
|
||||
options = [similarity(shorter[1:], longer)]
|
||||
for i in range(1, len(shorter)+1):
|
||||
match_idx = longer.find(shorter[:i])
|
||||
if match_idx == -1:
|
||||
break
|
||||
options.append(i*i + similarity(shorter[i:], longer[match_idx+i:]))
|
||||
result = max(options)
|
||||
if cache_cur and (cached := cache_cur.execute("SELECT similarity FROM similarities WHERE shorter = ? AND longer = ?", (shorter, longer)).fetchone()) is not None:
|
||||
result = cached[0]
|
||||
else:
|
||||
options = [similarity(shorter[1:], longer)]
|
||||
for i in range(1, len(shorter)+1):
|
||||
match_idx = longer.find(shorter[:i])
|
||||
if match_idx == -1:
|
||||
break
|
||||
options.append(i*i + similarity(shorter[i:], longer[match_idx+i:]))
|
||||
result = max(options)
|
||||
|
||||
if cache_cur:
|
||||
cache_cur.execute(
|
||||
"INSERT INTO similarities(shorter, longer, similarity) VALUES(?, ?, ?)",
|
||||
(shorter, longer, result),
|
||||
)
|
||||
|
||||
memoized_similarities[(shorter, longer)] = result
|
||||
return result
|
||||
|
@ -1090,6 +1099,7 @@ def top(items, n, key, overflow=0):
|
|||
return [item for (item, score) in winners[:n+overflow]]
|
||||
|
||||
def generate(args):
|
||||
debug('loading templates')
|
||||
jenv = Environment(
|
||||
loader=PackageLoader("dlibrary"),
|
||||
autoescape=select_autoescape()
|
||||
|
@ -1099,16 +1109,29 @@ def generate(args):
|
|||
categorization_template = jenv.get_template("categorization.html")
|
||||
work_template = jenv.get_template("work.html")
|
||||
index_template = jenv.get_template("index.html")
|
||||
debug('templates loaded')
|
||||
|
||||
debug('opening main database')
|
||||
con = sqlite3.connect(args.destdir / 'meta.db')
|
||||
cur = con.cursor()
|
||||
debug('main database open')
|
||||
|
||||
debug('opening cache database')
|
||||
cache_con = sqlite3.connect(args.destdir / 'cache.db')
|
||||
cache_cur = cache_con.cursor()
|
||||
cache_cur.execute("CREATE TABLE IF NOT EXISTS similarities(shorter TEXT, longer TEXT, similarity INT, PRIMARY KEY(shorter, longer))")
|
||||
debug('cache database open')
|
||||
|
||||
site_dir = args.destdir / 'site'
|
||||
|
||||
collated_work_ids = {p.name for p in (site_dir / 'images').iterdir()}
|
||||
|
||||
works = []
|
||||
for (work_id, title, circle, date, description, series) in cur.execute('SELECT id, title, circle, date, description, series FROM works ORDER BY date DESC').fetchall():
|
||||
debug('checking thumbnail files')
|
||||
thumbnail_files = {f.stem: f for f in (site_dir / 'thumbnails').iterdir()}
|
||||
debug(f'{len(thumbnail_files)} thumbnail files found')
|
||||
debug('running database query for works')
|
||||
for (idx, (work_id, title, circle, date, description, series)) in enumerate(cur.execute('SELECT id, title, circle, date, description, series FROM works ORDER BY date DESC').fetchall()):
|
||||
if work_id not in collated_work_ids:
|
||||
continue
|
||||
authors = [author for (author,) in cur.execute('SELECT author FROM authors WHERE work = ?', (work_id,))]
|
||||
|
@ -1117,12 +1140,7 @@ def generate(args):
|
|||
images = [path.name for path in (site_dir / 'images' / work_id).iterdir()]
|
||||
images.sort()
|
||||
|
||||
try:
|
||||
thumbnail_path = relpath(next(
|
||||
f for f in (site_dir / 'thumbnails').iterdir() if f.stem == work_id
|
||||
), site_dir)
|
||||
except StopIteration:
|
||||
thumbnail_path = f'images/{work_id}/{images[0]}'
|
||||
thumbnail_path = relpath(thumbnail_files.get(work_id, site_dir / 'images' / work_id / images[0]), site_dir)
|
||||
work = {
|
||||
'id': work_id,
|
||||
'title': title,
|
||||
|
@ -1137,13 +1155,16 @@ def generate(args):
|
|||
}
|
||||
works.append(work)
|
||||
|
||||
print(f'\x1b[2K\r{idx+1} database entries read...', end='')
|
||||
print()
|
||||
|
||||
for (idx, work) in enumerate(works):
|
||||
def suggestion_priority(other_work):
|
||||
if other_work is work:
|
||||
return -2
|
||||
if work['series'] and work['series'] == other_work['series']:
|
||||
return -1
|
||||
return similarity(work['title'], other_work['title'])
|
||||
return similarity(work['title'], other_work['title'], cache_cur)
|
||||
suggested = top(works, SUGGESTED_WORKS_COUNT, suggestion_priority)
|
||||
|
||||
work_dir = site_dir / 'works' / work['id']
|
||||
|
@ -1155,6 +1176,7 @@ def generate(args):
|
|||
f.write(viewer_template.render(depth=3, work=work, title=work['title']))
|
||||
|
||||
print(f'\x1b[2K\r{idx+1}/{len(works)} works processed...', end=('' if idx+1 < len(works) else '\n'))
|
||||
cache_con.commit()
|
||||
|
||||
uca = pyuca.Collator().sort_key
|
||||
def make_categorization(categorization, query, work_filter, work_style_cards=False):
|
||||
|
@ -1162,7 +1184,7 @@ def generate(args):
|
|||
|
||||
cats = sorted((cat for (cat,) in cur.execute(query)), key=uca)
|
||||
cat_samples = {}
|
||||
for cat in cats:
|
||||
for (idx, cat) in enumerate(cats):
|
||||
cat_works = list(filter(work_filter(cat), works))
|
||||
cat_samples[cat] = cat_works[0] if len(cat_works) > 0 else None
|
||||
|
||||
|
@ -1176,6 +1198,7 @@ def generate(args):
|
|||
title=cat,
|
||||
categorization=categorization,
|
||||
))
|
||||
print(f'\x1b[2K\r{idx+1}/{len(cats)} {categorization} processed...', end=('' if idx+1 < len(cats) else '\n'))
|
||||
|
||||
categorization_dir.mkdir(parents=True, exist_ok=True)
|
||||
with open(categorization_dir / 'index.html', 'w') as f:
|
||||
|
@ -1209,13 +1232,23 @@ def generate(args):
|
|||
work_style_cards=True,
|
||||
)
|
||||
|
||||
debug('copying static files')
|
||||
with resources.as_file(resources.files("dlibrary")) as r:
|
||||
copy_recursive(r / 'static', site_dir / 'static')
|
||||
debug('static files copied')
|
||||
|
||||
debug('writing index page')
|
||||
with open(site_dir / 'index.html', 'w') as f:
|
||||
f.write(index_template.render(depth=0, works=works))
|
||||
debug('index page written')
|
||||
|
||||
debug('closing cache database')
|
||||
cache_con.close()
|
||||
debug('cache database closed')
|
||||
|
||||
debug('closing main database')
|
||||
con.close()
|
||||
debug('main database closed')
|
||||
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
|
|
Loading…
Reference in a new issue