diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index ec567ed..3675ae0 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -24,13 +24,10 @@ DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$') FANZA_ID_REGEX = re.compile('^d_[0-9]+$') FAKKU_ID_REGEX = re.compile('.*_FAKKU$') -TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless') -ALT_VERSIONS = ['褐色', '日焼け'] - IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store'] -IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd'] +IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm'] def open_zipfile_with_encoding(path): try: @@ -114,6 +111,7 @@ async def fetch_async(args): } authors = dlsite_metadata.author or [] tags = dlsite_metadata.genre or [] + thumbnail_local = False thumbnail_url = dlsite_metadata.work_image if thumbnail_url.startswith('//'): thumbnail_url = 'https:' + thumbnail_url @@ -122,11 +120,17 @@ async def fetch_async(args): authors = db_row.pop('authors') tags = db_row.pop('tags') if FANZA_ID_REGEX.fullmatch(work_id): + thumbnail_local = False thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg' elif FAKKU_ID_REGEX.fullmatch(work_id): - thumbnail_url = None + thumbnail_local = True + thumbnail_path = min(work_path.iterdir()) + if not thumbnail_path.is_file(): + print(f'Fakku thumbnail path {thumbnail_path} is not a file! Skipping {work_id}') + continue else: - thumbnail_url = input('Thumbnail image URL [default: first page]: ') + thumbnail_local = False + thumbnail_url = input('Thumbnail image URL: ') cur.execute( "INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)", @@ -141,7 +145,11 @@ async def fetch_async(args): [{ "tag": tag, "work": work_id } for tag in tags], ) - if thumbnail_url: + if thumbnail_local: + ext = thumbnail_path.suffix + dest_path = thumbnails_dir / (work_id + ext) + dest_path.symlink_to(relpath(thumbnail_path, thumbnails_dir)) + else: ext = url_file_ext(thumbnail_url) dest_file = thumbnails_dir / (work_id + ext) print(f'Downloading thumbnail for {work_id} from {thumbnail_url}') @@ -194,65 +202,24 @@ def link_pdf(src, dest, start_index=0): f.write(image["image"]) def complete_prefix_number_ordering(entries): - if len(entries) == 1: - return entries - - entries_by_version = {} - for entry in entries: - version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name) - entries_by_version.setdefault(version, []).append(entry) - - numberings_by_version = {ver: prefix_numbering(entries_by_version[ver]) for ver in entries_by_version} - - unified_indices = set() - for numbering in numberings_by_version.values(): - if numbering is None: - return None - unified_indices |= set(numbering.keys()) - unified_indices = list(unified_indices) - unified_indices.sort() - - if len(unified_indices) > 1 and min(unified_indices[i] - unified_indices[i-1] for i in range(1, len(unified_indices))) > 2: - return None - - versions = list(numberings_by_version.keys()) - versions.sort() - - version_lengths = {ver: len(numberings_by_version[ver]) for ver in numberings_by_version} - inner_versions = [] - outer_versions = [versions[0]] - for ver in versions[1:]: - if version_lengths[ver] >= version_lengths[versions[0]] - 2: - outer_versions.append(ver) - else: - inner_versions.append(ver) - - result = [] - for out_ver in outer_versions: - for i in unified_indices: - for ver in ([out_ver] + (inner_versions if out_ver == versions[0] else [])): - entries_i_ver = numberings_by_version[ver].get(i, []) - if len(entries_i_ver) <= 1: - result += entries_i_ver - else: - return None - return result - -def prefix_numbering(entries): matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name))) for m in matches: pos = m.start() prefix = entries[0].name[:pos] if all(e.name.startswith(prefix) for e in entries): - entries_by_index = {} + entries_with_indices = [] + indices = set() for e in entries: n = NUMBER_REGEX.match(e.name[pos:]) if n is None: return None i = int(n.group()) - entries_by_index.setdefault(i, []).append(e) - return entries_by_index - + if i in indices: + return None + indices.add(i) + entries_with_indices.append((e, i)) + entries_with_indices.sort(key=lambda ei: ei[1]) + return [e for (e, i) in entries_with_indices] return None def link_ordered_files(ordering, dest, start_index=0): @@ -310,19 +277,6 @@ def collate(args): if all(entry.is_file() and entry.suffix.lower() in IMAGE_FILE_EXTENSIONS for entry in entries): ordering = complete_prefix_number_ordering(entries) - if not ordering: - with_text = [] - textless = [] - for entry in entries: - if TEXTLESS_REGEX.search(entry.name): - textless.append(entry) - else: - with_text.append(entry) - if with_text and textless: - with_text_ordering = complete_prefix_number_ordering(with_text) - textless_ordering = complete_prefix_number_ordering(textless) - if with_text_ordering and textless_ordering: - ordering = with_text_ordering + textless_ordering if ordering: print(f'Symlinking image files for {work_id}') link_ordered_files(ordering, collation_dir) @@ -421,9 +375,7 @@ def generate(args): con = sqlite3.connect(args.destdir / 'meta.db') cur = con.cursor() - site_dir = args.destdir / 'site' - - collated_work_ids = {p.name for p in (site_dir / 'images').iterdir()} + collated_work_ids = {p.name for p in (args.destdir / 'site' / 'images').iterdir()} actual_series = {series for (series,) in cur.execute('SELECT series FROM works GROUP BY series HAVING count(series) > 1')} @@ -433,16 +385,9 @@ def generate(args): continue authors = [author for (author,) in cur.execute('SELECT author FROM authors WHERE work = ?', (work_id,))] tags = [tag for (tag,) in cur.execute('SELECT tag FROM tags WHERE work = ?', (work_id,))] - - images = [path.name for path in (site_dir / 'images' / work_id).iterdir()] - images.sort() - - try: - thumbnail_path = relpath(next( - f for f in (site_dir / 'thumbnails').iterdir() if f.stem == work_id - ), site_dir) - except StopIteration: - thumbnail_path = f'images/{work_id}/{images[0]}' + thumbnail_filename = next( + f for f in (args.destdir / 'site' / 'thumbnails').iterdir() if f.stem == work_id + ).name work = { 'id': work_id, 'title': title, @@ -452,11 +397,14 @@ def generate(args): 'series': series, 'authors': authors, 'tags': tags, - 'thumbnail_path': thumbnail_path, + 'thumbnail_filename': thumbnail_filename, } works.append(work) - work_dir = site_dir / 'works' / work_id + images = [path.name for path in (args.destdir / 'site' / 'images' / work_id).iterdir()] + images.sort() + + work_dir = args.destdir / 'site' / 'works' / work_id viewer_dir = work_dir / 'view' viewer_dir.mkdir(parents=True, exist_ok=True) with open(work_dir / 'index.html', 'w') as f: @@ -465,7 +413,7 @@ def generate(args): f.write(viewer_template.render(depth=3, work=work, title=title, images=images)) def make_categorization(categorization, query, work_filter, work_style_cards=False): - categorization_dir = site_dir / categorization + categorization_dir = args.destdir / 'site' / categorization cats = [cat for (cat,) in cur.execute(query)] cat_samples = {} @@ -517,9 +465,9 @@ def generate(args): ) with resources.as_file(resources.files("dlibrary")) as r: - copy_recursive(r / 'static', site_dir / 'static') + copy_recursive(r / 'static', args.destdir / 'site' / 'static') - with open(site_dir / 'index.html', 'w') as f: + with open(args.destdir / 'site' / 'index.html', 'w') as f: f.write(index_template.render(depth=0, works=works)) con.close() diff --git a/dlibrary/static/index.js b/dlibrary/static/index.js index 50d7be7..328476d 100644 --- a/dlibrary/static/index.js +++ b/dlibrary/static/index.js @@ -48,7 +48,7 @@ document.addEventListener('DOMContentLoaded', () => { card.appendChild(link); const thumb = document.createElement('img'); - thumb.src = `${ROOT}/${work.thumbnail_path}`; + thumb.src = `${ROOT}/thumbnails/${work.thumbnail_filename}`; link.appendChild(thumb); const creators = document.createElement('div'); diff --git a/dlibrary/templates/categorization.html b/dlibrary/templates/categorization.html index 40e6817..7849819 100644 --- a/dlibrary/templates/categorization.html +++ b/dlibrary/templates/categorization.html @@ -12,7 +12,7 @@ {{ cat }} {% if samples[cat] %} - + {% endif %} diff --git a/dlibrary/templates/list.html b/dlibrary/templates/list.html index 940ca91..0e28cc5 100644 --- a/dlibrary/templates/list.html +++ b/dlibrary/templates/list.html @@ -7,7 +7,7 @@ {% for work in works %}
- +
[{% if work['circle'] %}{{ work['circle'] }}{% endif %}{% if work['circle'] and work['authors'] %} ({% endif %}{{ ', '.join(work['authors']) }}{% if work['circle'] and work['authors'] %}){% endif %}]
diff --git a/dlibrary/templates/work.html b/dlibrary/templates/work.html index 2bedfb2..cf0f6bc 100644 --- a/dlibrary/templates/work.html +++ b/dlibrary/templates/work.html @@ -5,7 +5,7 @@