optimizations and extra debug info for generate mode
This commit is contained in:
		
							parent
							
								
									a822a504e8
								
							
						
					
					
						commit
						21c6e11228
					
				
					 1 changed files with 52 additions and 19 deletions
				
			
		|  | @ -14,6 +14,7 @@ import shutil | |||
| import sqlite3 | ||||
| import stat | ||||
| import textwrap | ||||
| import time | ||||
| import unicodedata | ||||
| from urllib.parse import urlparse | ||||
| import zipfile | ||||
|  | @ -105,7 +106,7 @@ READONLY_DIR = READONLY_FILE | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | |||
| debug_mode = False | ||||
| def debug(s): | ||||
|     if debug_mode: | ||||
|         print(s) | ||||
|         print(f'{time.strftime("%Y-%m-%d %H:%M:%S")} - {s}') | ||||
| 
 | ||||
| def open_zipfile_with_encoding(path): | ||||
|     for enc in ["utf-8", "shift-jis", "shift-jisx0213"]: | ||||
|  | @ -1046,8 +1047,7 @@ def copy_recursive(src, dest): | |||
| 
 | ||||
| 
 | ||||
| memoized_similarities = {} | ||||
| 
 | ||||
| def similarity(a, b): | ||||
| def similarity(a, b, cache_cur=None): | ||||
|     if len(a) < len(b) or (len(a) == len(b) and a < b): | ||||
|         shorter = a | ||||
|         longer = b | ||||
|  | @ -1060,13 +1060,22 @@ def similarity(a, b): | |||
|     if (shorter, longer) in memoized_similarities: | ||||
|         return memoized_similarities[(shorter, longer)] | ||||
| 
 | ||||
|     options = [similarity(shorter[1:], longer)] | ||||
|     for i in range(1, len(shorter)+1): | ||||
|         match_idx = longer.find(shorter[:i]) | ||||
|         if match_idx == -1: | ||||
|             break | ||||
|         options.append(i*i + similarity(shorter[i:], longer[match_idx+i:])) | ||||
|     result = max(options) | ||||
|     if cache_cur and (cached := cache_cur.execute("SELECT similarity FROM similarities WHERE shorter = ? AND longer = ?", (shorter, longer)).fetchone()) is not None: | ||||
|         result = cached[0] | ||||
|     else: | ||||
|         options = [similarity(shorter[1:], longer)] | ||||
|         for i in range(1, len(shorter)+1): | ||||
|             match_idx = longer.find(shorter[:i]) | ||||
|             if match_idx == -1: | ||||
|                 break | ||||
|             options.append(i*i + similarity(shorter[i:], longer[match_idx+i:])) | ||||
|         result = max(options) | ||||
| 
 | ||||
|         if cache_cur: | ||||
|             cache_cur.execute( | ||||
|                 "INSERT INTO similarities(shorter, longer, similarity) VALUES(?, ?, ?)", | ||||
|                 (shorter, longer, result), | ||||
|             ) | ||||
| 
 | ||||
|     memoized_similarities[(shorter, longer)] = result | ||||
|     return result | ||||
|  | @ -1090,6 +1099,7 @@ def top(items, n, key, overflow=0): | |||
|     return [item for (item, score) in winners[:n+overflow]] | ||||
| 
 | ||||
| def generate(args): | ||||
|     debug('loading templates') | ||||
|     jenv = Environment( | ||||
|         loader=PackageLoader("dlibrary"), | ||||
|         autoescape=select_autoescape() | ||||
|  | @ -1099,16 +1109,29 @@ def generate(args): | |||
|     categorization_template = jenv.get_template("categorization.html") | ||||
|     work_template = jenv.get_template("work.html") | ||||
|     index_template = jenv.get_template("index.html") | ||||
|     debug('templates loaded') | ||||
| 
 | ||||
|     debug('opening main database') | ||||
|     con = sqlite3.connect(args.destdir / 'meta.db') | ||||
|     cur = con.cursor() | ||||
|     debug('main database open') | ||||
| 
 | ||||
|     debug('opening cache database') | ||||
|     cache_con = sqlite3.connect(args.destdir / 'cache.db') | ||||
|     cache_cur = cache_con.cursor() | ||||
|     cache_cur.execute("CREATE TABLE IF NOT EXISTS similarities(shorter TEXT, longer TEXT, similarity INT, PRIMARY KEY(shorter, longer))") | ||||
|     debug('cache database open') | ||||
| 
 | ||||
|     site_dir = args.destdir / 'site' | ||||
| 
 | ||||
|     collated_work_ids = {p.name for p in (site_dir / 'images').iterdir()} | ||||
| 
 | ||||
|     works = [] | ||||
|     for (work_id, title, circle, date, description, series) in cur.execute('SELECT id, title, circle, date, description, series FROM works ORDER BY date DESC').fetchall(): | ||||
|     debug('checking thumbnail files') | ||||
|     thumbnail_files = {f.stem: f for f in (site_dir / 'thumbnails').iterdir()} | ||||
|     debug(f'{len(thumbnail_files)} thumbnail files found') | ||||
|     debug('running database query for works') | ||||
|     for (idx, (work_id, title, circle, date, description, series)) in enumerate(cur.execute('SELECT id, title, circle, date, description, series FROM works ORDER BY date DESC').fetchall()): | ||||
|         if work_id not in collated_work_ids: | ||||
|             continue | ||||
|         authors = [author for (author,) in cur.execute('SELECT author FROM authors WHERE work = ?', (work_id,))] | ||||
|  | @ -1117,12 +1140,7 @@ def generate(args): | |||
|         images = [path.name for path in (site_dir / 'images' / work_id).iterdir()] | ||||
|         images.sort() | ||||
| 
 | ||||
|         try: | ||||
|             thumbnail_path = relpath(next( | ||||
|                 f for f in (site_dir / 'thumbnails').iterdir() if f.stem == work_id | ||||
|             ), site_dir) | ||||
|         except StopIteration: | ||||
|             thumbnail_path = f'images/{work_id}/{images[0]}' | ||||
|         thumbnail_path = relpath(thumbnail_files.get(work_id, site_dir / 'images' / work_id / images[0]), site_dir) | ||||
|         work = { | ||||
|             'id': work_id, | ||||
|             'title': title, | ||||
|  | @ -1137,13 +1155,16 @@ def generate(args): | |||
|         } | ||||
|         works.append(work) | ||||
| 
 | ||||
|         print(f'\x1b[2K\r{idx+1} database entries read...', end='') | ||||
|     print() | ||||
| 
 | ||||
|     for (idx, work) in enumerate(works): | ||||
|         def suggestion_priority(other_work): | ||||
|             if other_work is work: | ||||
|                 return -2 | ||||
|             if work['series'] and work['series'] == other_work['series']: | ||||
|                 return -1 | ||||
|             return similarity(work['title'], other_work['title']) | ||||
|             return similarity(work['title'], other_work['title'], cache_cur) | ||||
|         suggested = top(works, SUGGESTED_WORKS_COUNT, suggestion_priority) | ||||
| 
 | ||||
|         work_dir = site_dir / 'works' / work['id'] | ||||
|  | @ -1155,6 +1176,7 @@ def generate(args): | |||
|             f.write(viewer_template.render(depth=3, work=work, title=work['title'])) | ||||
| 
 | ||||
|         print(f'\x1b[2K\r{idx+1}/{len(works)} works processed...', end=('' if idx+1 < len(works) else '\n')) | ||||
|     cache_con.commit() | ||||
| 
 | ||||
|     uca = pyuca.Collator().sort_key | ||||
|     def make_categorization(categorization, query, work_filter, work_style_cards=False): | ||||
|  | @ -1162,7 +1184,7 @@ def generate(args): | |||
| 
 | ||||
|         cats = sorted((cat for (cat,) in cur.execute(query)), key=uca) | ||||
|         cat_samples = {} | ||||
|         for cat in cats: | ||||
|         for (idx, cat) in enumerate(cats): | ||||
|             cat_works = list(filter(work_filter(cat), works)) | ||||
|             cat_samples[cat] = cat_works[0] if len(cat_works) > 0 else None | ||||
| 
 | ||||
|  | @ -1176,6 +1198,7 @@ def generate(args): | |||
|                     title=cat, | ||||
|                     categorization=categorization, | ||||
|                 )) | ||||
|             print(f'\x1b[2K\r{idx+1}/{len(cats)} {categorization} processed...', end=('' if idx+1 < len(cats) else '\n')) | ||||
| 
 | ||||
|         categorization_dir.mkdir(parents=True, exist_ok=True) | ||||
|         with open(categorization_dir / 'index.html', 'w') as f: | ||||
|  | @ -1209,13 +1232,23 @@ def generate(args): | |||
|         work_style_cards=True, | ||||
|     ) | ||||
| 
 | ||||
|     debug('copying static files') | ||||
|     with resources.as_file(resources.files("dlibrary")) as r: | ||||
|         copy_recursive(r / 'static', site_dir / 'static') | ||||
|     debug('static files copied') | ||||
| 
 | ||||
|     debug('writing index page') | ||||
|     with open(site_dir / 'index.html', 'w') as f: | ||||
|         f.write(index_template.render(depth=0, works=works)) | ||||
|     debug('index page written') | ||||
| 
 | ||||
|     debug('closing cache database') | ||||
|     cache_con.close() | ||||
|     debug('cache database closed') | ||||
| 
 | ||||
|     debug('closing main database') | ||||
|     con.close() | ||||
|     debug('main database closed') | ||||
| 
 | ||||
| 
 | ||||
| argparser = argparse.ArgumentParser( | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue