optimizations and extra debug info for generate mode
This commit is contained in:
parent
a822a504e8
commit
21c6e11228
|
@ -14,6 +14,7 @@ import shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import stat
|
import stat
|
||||||
import textwrap
|
import textwrap
|
||||||
|
import time
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import zipfile
|
import zipfile
|
||||||
|
@ -105,7 +106,7 @@ READONLY_DIR = READONLY_FILE | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
|
||||||
debug_mode = False
|
debug_mode = False
|
||||||
def debug(s):
|
def debug(s):
|
||||||
if debug_mode:
|
if debug_mode:
|
||||||
print(s)
|
print(f'{time.strftime("%Y-%m-%d %H:%M:%S")} - {s}')
|
||||||
|
|
||||||
def open_zipfile_with_encoding(path):
|
def open_zipfile_with_encoding(path):
|
||||||
for enc in ["utf-8", "shift-jis", "shift-jisx0213"]:
|
for enc in ["utf-8", "shift-jis", "shift-jisx0213"]:
|
||||||
|
@ -1046,8 +1047,7 @@ def copy_recursive(src, dest):
|
||||||
|
|
||||||
|
|
||||||
memoized_similarities = {}
|
memoized_similarities = {}
|
||||||
|
def similarity(a, b, cache_cur=None):
|
||||||
def similarity(a, b):
|
|
||||||
if len(a) < len(b) or (len(a) == len(b) and a < b):
|
if len(a) < len(b) or (len(a) == len(b) and a < b):
|
||||||
shorter = a
|
shorter = a
|
||||||
longer = b
|
longer = b
|
||||||
|
@ -1060,13 +1060,22 @@ def similarity(a, b):
|
||||||
if (shorter, longer) in memoized_similarities:
|
if (shorter, longer) in memoized_similarities:
|
||||||
return memoized_similarities[(shorter, longer)]
|
return memoized_similarities[(shorter, longer)]
|
||||||
|
|
||||||
options = [similarity(shorter[1:], longer)]
|
if cache_cur and (cached := cache_cur.execute("SELECT similarity FROM similarities WHERE shorter = ? AND longer = ?", (shorter, longer)).fetchone()) is not None:
|
||||||
for i in range(1, len(shorter)+1):
|
result = cached[0]
|
||||||
match_idx = longer.find(shorter[:i])
|
else:
|
||||||
if match_idx == -1:
|
options = [similarity(shorter[1:], longer)]
|
||||||
break
|
for i in range(1, len(shorter)+1):
|
||||||
options.append(i*i + similarity(shorter[i:], longer[match_idx+i:]))
|
match_idx = longer.find(shorter[:i])
|
||||||
result = max(options)
|
if match_idx == -1:
|
||||||
|
break
|
||||||
|
options.append(i*i + similarity(shorter[i:], longer[match_idx+i:]))
|
||||||
|
result = max(options)
|
||||||
|
|
||||||
|
if cache_cur:
|
||||||
|
cache_cur.execute(
|
||||||
|
"INSERT INTO similarities(shorter, longer, similarity) VALUES(?, ?, ?)",
|
||||||
|
(shorter, longer, result),
|
||||||
|
)
|
||||||
|
|
||||||
memoized_similarities[(shorter, longer)] = result
|
memoized_similarities[(shorter, longer)] = result
|
||||||
return result
|
return result
|
||||||
|
@ -1090,6 +1099,7 @@ def top(items, n, key, overflow=0):
|
||||||
return [item for (item, score) in winners[:n+overflow]]
|
return [item for (item, score) in winners[:n+overflow]]
|
||||||
|
|
||||||
def generate(args):
|
def generate(args):
|
||||||
|
debug('loading templates')
|
||||||
jenv = Environment(
|
jenv = Environment(
|
||||||
loader=PackageLoader("dlibrary"),
|
loader=PackageLoader("dlibrary"),
|
||||||
autoescape=select_autoescape()
|
autoescape=select_autoescape()
|
||||||
|
@ -1099,16 +1109,29 @@ def generate(args):
|
||||||
categorization_template = jenv.get_template("categorization.html")
|
categorization_template = jenv.get_template("categorization.html")
|
||||||
work_template = jenv.get_template("work.html")
|
work_template = jenv.get_template("work.html")
|
||||||
index_template = jenv.get_template("index.html")
|
index_template = jenv.get_template("index.html")
|
||||||
|
debug('templates loaded')
|
||||||
|
|
||||||
|
debug('opening main database')
|
||||||
con = sqlite3.connect(args.destdir / 'meta.db')
|
con = sqlite3.connect(args.destdir / 'meta.db')
|
||||||
cur = con.cursor()
|
cur = con.cursor()
|
||||||
|
debug('main database open')
|
||||||
|
|
||||||
|
debug('opening cache database')
|
||||||
|
cache_con = sqlite3.connect(args.destdir / 'cache.db')
|
||||||
|
cache_cur = cache_con.cursor()
|
||||||
|
cache_cur.execute("CREATE TABLE IF NOT EXISTS similarities(shorter TEXT, longer TEXT, similarity INT, PRIMARY KEY(shorter, longer))")
|
||||||
|
debug('cache database open')
|
||||||
|
|
||||||
site_dir = args.destdir / 'site'
|
site_dir = args.destdir / 'site'
|
||||||
|
|
||||||
collated_work_ids = {p.name for p in (site_dir / 'images').iterdir()}
|
collated_work_ids = {p.name for p in (site_dir / 'images').iterdir()}
|
||||||
|
|
||||||
works = []
|
works = []
|
||||||
for (work_id, title, circle, date, description, series) in cur.execute('SELECT id, title, circle, date, description, series FROM works ORDER BY date DESC').fetchall():
|
debug('checking thumbnail files')
|
||||||
|
thumbnail_files = {f.stem: f for f in (site_dir / 'thumbnails').iterdir()}
|
||||||
|
debug(f'{len(thumbnail_files)} thumbnail files found')
|
||||||
|
debug('running database query for works')
|
||||||
|
for (idx, (work_id, title, circle, date, description, series)) in enumerate(cur.execute('SELECT id, title, circle, date, description, series FROM works ORDER BY date DESC').fetchall()):
|
||||||
if work_id not in collated_work_ids:
|
if work_id not in collated_work_ids:
|
||||||
continue
|
continue
|
||||||
authors = [author for (author,) in cur.execute('SELECT author FROM authors WHERE work = ?', (work_id,))]
|
authors = [author for (author,) in cur.execute('SELECT author FROM authors WHERE work = ?', (work_id,))]
|
||||||
|
@ -1117,12 +1140,7 @@ def generate(args):
|
||||||
images = [path.name for path in (site_dir / 'images' / work_id).iterdir()]
|
images = [path.name for path in (site_dir / 'images' / work_id).iterdir()]
|
||||||
images.sort()
|
images.sort()
|
||||||
|
|
||||||
try:
|
thumbnail_path = relpath(thumbnail_files.get(work_id, site_dir / 'images' / work_id / images[0]), site_dir)
|
||||||
thumbnail_path = relpath(next(
|
|
||||||
f for f in (site_dir / 'thumbnails').iterdir() if f.stem == work_id
|
|
||||||
), site_dir)
|
|
||||||
except StopIteration:
|
|
||||||
thumbnail_path = f'images/{work_id}/{images[0]}'
|
|
||||||
work = {
|
work = {
|
||||||
'id': work_id,
|
'id': work_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -1137,13 +1155,16 @@ def generate(args):
|
||||||
}
|
}
|
||||||
works.append(work)
|
works.append(work)
|
||||||
|
|
||||||
|
print(f'\x1b[2K\r{idx+1} database entries read...', end='')
|
||||||
|
print()
|
||||||
|
|
||||||
for (idx, work) in enumerate(works):
|
for (idx, work) in enumerate(works):
|
||||||
def suggestion_priority(other_work):
|
def suggestion_priority(other_work):
|
||||||
if other_work is work:
|
if other_work is work:
|
||||||
return -2
|
return -2
|
||||||
if work['series'] and work['series'] == other_work['series']:
|
if work['series'] and work['series'] == other_work['series']:
|
||||||
return -1
|
return -1
|
||||||
return similarity(work['title'], other_work['title'])
|
return similarity(work['title'], other_work['title'], cache_cur)
|
||||||
suggested = top(works, SUGGESTED_WORKS_COUNT, suggestion_priority)
|
suggested = top(works, SUGGESTED_WORKS_COUNT, suggestion_priority)
|
||||||
|
|
||||||
work_dir = site_dir / 'works' / work['id']
|
work_dir = site_dir / 'works' / work['id']
|
||||||
|
@ -1155,6 +1176,7 @@ def generate(args):
|
||||||
f.write(viewer_template.render(depth=3, work=work, title=work['title']))
|
f.write(viewer_template.render(depth=3, work=work, title=work['title']))
|
||||||
|
|
||||||
print(f'\x1b[2K\r{idx+1}/{len(works)} works processed...', end=('' if idx+1 < len(works) else '\n'))
|
print(f'\x1b[2K\r{idx+1}/{len(works)} works processed...', end=('' if idx+1 < len(works) else '\n'))
|
||||||
|
cache_con.commit()
|
||||||
|
|
||||||
uca = pyuca.Collator().sort_key
|
uca = pyuca.Collator().sort_key
|
||||||
def make_categorization(categorization, query, work_filter, work_style_cards=False):
|
def make_categorization(categorization, query, work_filter, work_style_cards=False):
|
||||||
|
@ -1162,7 +1184,7 @@ def generate(args):
|
||||||
|
|
||||||
cats = sorted((cat for (cat,) in cur.execute(query)), key=uca)
|
cats = sorted((cat for (cat,) in cur.execute(query)), key=uca)
|
||||||
cat_samples = {}
|
cat_samples = {}
|
||||||
for cat in cats:
|
for (idx, cat) in enumerate(cats):
|
||||||
cat_works = list(filter(work_filter(cat), works))
|
cat_works = list(filter(work_filter(cat), works))
|
||||||
cat_samples[cat] = cat_works[0] if len(cat_works) > 0 else None
|
cat_samples[cat] = cat_works[0] if len(cat_works) > 0 else None
|
||||||
|
|
||||||
|
@ -1176,6 +1198,7 @@ def generate(args):
|
||||||
title=cat,
|
title=cat,
|
||||||
categorization=categorization,
|
categorization=categorization,
|
||||||
))
|
))
|
||||||
|
print(f'\x1b[2K\r{idx+1}/{len(cats)} {categorization} processed...', end=('' if idx+1 < len(cats) else '\n'))
|
||||||
|
|
||||||
categorization_dir.mkdir(parents=True, exist_ok=True)
|
categorization_dir.mkdir(parents=True, exist_ok=True)
|
||||||
with open(categorization_dir / 'index.html', 'w') as f:
|
with open(categorization_dir / 'index.html', 'w') as f:
|
||||||
|
@ -1209,13 +1232,23 @@ def generate(args):
|
||||||
work_style_cards=True,
|
work_style_cards=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
debug('copying static files')
|
||||||
with resources.as_file(resources.files("dlibrary")) as r:
|
with resources.as_file(resources.files("dlibrary")) as r:
|
||||||
copy_recursive(r / 'static', site_dir / 'static')
|
copy_recursive(r / 'static', site_dir / 'static')
|
||||||
|
debug('static files copied')
|
||||||
|
|
||||||
|
debug('writing index page')
|
||||||
with open(site_dir / 'index.html', 'w') as f:
|
with open(site_dir / 'index.html', 'w') as f:
|
||||||
f.write(index_template.render(depth=0, works=works))
|
f.write(index_template.render(depth=0, works=works))
|
||||||
|
debug('index page written')
|
||||||
|
|
||||||
|
debug('closing cache database')
|
||||||
|
cache_con.close()
|
||||||
|
debug('cache database closed')
|
||||||
|
|
||||||
|
debug('closing main database')
|
||||||
con.close()
|
con.close()
|
||||||
|
debug('main database closed')
|
||||||
|
|
||||||
|
|
||||||
argparser = argparse.ArgumentParser(
|
argparser = argparse.ArgumentParser(
|
||||||
|
|
Loading…
Reference in a new issue