diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index 3675ae0..ec567ed 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -24,10 +24,13 @@ DLSITE_ID_REGEX = re.compile('^[BR]J[0-9]+$')
FANZA_ID_REGEX = re.compile('^d_[0-9]+$')
FAKKU_ID_REGEX = re.compile('.*_FAKKU$')
+TEXTLESS_REGEX = re.compile('(台詞|セリフ)(な|無)し|notext|textless')
+ALT_VERSIONS = ['褐色', '日焼け']
+
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
-IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm']
+IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd']
def open_zipfile_with_encoding(path):
try:
@@ -111,7 +114,6 @@ async def fetch_async(args):
}
authors = dlsite_metadata.author or []
tags = dlsite_metadata.genre or []
- thumbnail_local = False
thumbnail_url = dlsite_metadata.work_image
if thumbnail_url.startswith('//'):
thumbnail_url = 'https:' + thumbnail_url
@@ -120,17 +122,11 @@ async def fetch_async(args):
authors = db_row.pop('authors')
tags = db_row.pop('tags')
if FANZA_ID_REGEX.fullmatch(work_id):
- thumbnail_local = False
thumbnail_url = f'https://doujin-assets.dmm.co.jp/digital/comic/{work_id}/{work_id}pl.jpg'
elif FAKKU_ID_REGEX.fullmatch(work_id):
- thumbnail_local = True
- thumbnail_path = min(work_path.iterdir())
- if not thumbnail_path.is_file():
- print(f'Fakku thumbnail path {thumbnail_path} is not a file! Skipping {work_id}')
- continue
+ thumbnail_url = None
else:
- thumbnail_local = False
- thumbnail_url = input('Thumbnail image URL: ')
+ thumbnail_url = input('Thumbnail image URL [default: first page]: ')
cur.execute(
"INSERT INTO works(id, title, circle, date, description, series) VALUES(:id, :title, :circle, :date, :description, :series)",
@@ -145,11 +141,7 @@ async def fetch_async(args):
[{ "tag": tag, "work": work_id } for tag in tags],
)
- if thumbnail_local:
- ext = thumbnail_path.suffix
- dest_path = thumbnails_dir / (work_id + ext)
- dest_path.symlink_to(relpath(thumbnail_path, thumbnails_dir))
- else:
+ if thumbnail_url:
ext = url_file_ext(thumbnail_url)
dest_file = thumbnails_dir / (work_id + ext)
print(f'Downloading thumbnail for {work_id} from {thumbnail_url}')
@@ -202,24 +194,65 @@ def link_pdf(src, dest, start_index=0):
f.write(image["image"])
def complete_prefix_number_ordering(entries):
+ if len(entries) == 1:
+ return entries
+
+ entries_by_version = {}
+ for entry in entries:
+ version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name)
+ entries_by_version.setdefault(version, []).append(entry)
+
+ numberings_by_version = {ver: prefix_numbering(entries_by_version[ver]) for ver in entries_by_version}
+
+ unified_indices = set()
+ for numbering in numberings_by_version.values():
+ if numbering is None:
+ return None
+ unified_indices |= set(numbering.keys())
+ unified_indices = list(unified_indices)
+ unified_indices.sort()
+
+ if len(unified_indices) > 1 and min(unified_indices[i] - unified_indices[i-1] for i in range(1, len(unified_indices))) > 2:
+ return None
+
+ versions = list(numberings_by_version.keys())
+ versions.sort()
+
+ version_lengths = {ver: len(numberings_by_version[ver]) for ver in numberings_by_version}
+ inner_versions = []
+ outer_versions = [versions[0]]
+ for ver in versions[1:]:
+ if version_lengths[ver] >= version_lengths[versions[0]] - 2:
+ outer_versions.append(ver)
+ else:
+ inner_versions.append(ver)
+
+ result = []
+ for out_ver in outer_versions:
+ for i in unified_indices:
+ for ver in ([out_ver] + (inner_versions if out_ver == versions[0] else [])):
+ entries_i_ver = numberings_by_version[ver].get(i, [])
+ if len(entries_i_ver) <= 1:
+ result += entries_i_ver
+ else:
+ return None
+ return result
+
+def prefix_numbering(entries):
matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name)))
for m in matches:
pos = m.start()
prefix = entries[0].name[:pos]
if all(e.name.startswith(prefix) for e in entries):
- entries_with_indices = []
- indices = set()
+ entries_by_index = {}
for e in entries:
n = NUMBER_REGEX.match(e.name[pos:])
if n is None:
return None
i = int(n.group())
- if i in indices:
- return None
- indices.add(i)
- entries_with_indices.append((e, i))
- entries_with_indices.sort(key=lambda ei: ei[1])
- return [e for (e, i) in entries_with_indices]
+ entries_by_index.setdefault(i, []).append(e)
+ return entries_by_index
+
return None
def link_ordered_files(ordering, dest, start_index=0):
@@ -277,6 +310,19 @@ def collate(args):
if all(entry.is_file() and entry.suffix.lower() in IMAGE_FILE_EXTENSIONS for entry in entries):
ordering = complete_prefix_number_ordering(entries)
+ if not ordering:
+ with_text = []
+ textless = []
+ for entry in entries:
+ if TEXTLESS_REGEX.search(entry.name):
+ textless.append(entry)
+ else:
+ with_text.append(entry)
+ if with_text and textless:
+ with_text_ordering = complete_prefix_number_ordering(with_text)
+ textless_ordering = complete_prefix_number_ordering(textless)
+ if with_text_ordering and textless_ordering:
+ ordering = with_text_ordering + textless_ordering
if ordering:
print(f'Symlinking image files for {work_id}')
link_ordered_files(ordering, collation_dir)
@@ -375,7 +421,9 @@ def generate(args):
con = sqlite3.connect(args.destdir / 'meta.db')
cur = con.cursor()
- collated_work_ids = {p.name for p in (args.destdir / 'site' / 'images').iterdir()}
+ site_dir = args.destdir / 'site'
+
+ collated_work_ids = {p.name for p in (site_dir / 'images').iterdir()}
actual_series = {series for (series,) in cur.execute('SELECT series FROM works GROUP BY series HAVING count(series) > 1')}
@@ -385,9 +433,16 @@ def generate(args):
continue
authors = [author for (author,) in cur.execute('SELECT author FROM authors WHERE work = ?', (work_id,))]
tags = [tag for (tag,) in cur.execute('SELECT tag FROM tags WHERE work = ?', (work_id,))]
- thumbnail_filename = next(
- f for f in (args.destdir / 'site' / 'thumbnails').iterdir() if f.stem == work_id
- ).name
+
+ images = [path.name for path in (site_dir / 'images' / work_id).iterdir()]
+ images.sort()
+
+ try:
+ thumbnail_path = relpath(next(
+ f for f in (site_dir / 'thumbnails').iterdir() if f.stem == work_id
+ ), site_dir)
+ except StopIteration:
+ thumbnail_path = f'images/{work_id}/{images[0]}'
work = {
'id': work_id,
'title': title,
@@ -397,14 +452,11 @@ def generate(args):
'series': series,
'authors': authors,
'tags': tags,
- 'thumbnail_filename': thumbnail_filename,
+ 'thumbnail_path': thumbnail_path,
}
works.append(work)
- images = [path.name for path in (args.destdir / 'site' / 'images' / work_id).iterdir()]
- images.sort()
-
- work_dir = args.destdir / 'site' / 'works' / work_id
+ work_dir = site_dir / 'works' / work_id
viewer_dir = work_dir / 'view'
viewer_dir.mkdir(parents=True, exist_ok=True)
with open(work_dir / 'index.html', 'w') as f:
@@ -413,7 +465,7 @@ def generate(args):
f.write(viewer_template.render(depth=3, work=work, title=title, images=images))
def make_categorization(categorization, query, work_filter, work_style_cards=False):
- categorization_dir = args.destdir / 'site' / categorization
+ categorization_dir = site_dir / categorization
cats = [cat for (cat,) in cur.execute(query)]
cat_samples = {}
@@ -465,9 +517,9 @@ def generate(args):
)
with resources.as_file(resources.files("dlibrary")) as r:
- copy_recursive(r / 'static', args.destdir / 'site' / 'static')
+ copy_recursive(r / 'static', site_dir / 'static')
- with open(args.destdir / 'site' / 'index.html', 'w') as f:
+ with open(site_dir / 'index.html', 'w') as f:
f.write(index_template.render(depth=0, works=works))
con.close()
diff --git a/dlibrary/static/index.js b/dlibrary/static/index.js
index 328476d..50d7be7 100644
--- a/dlibrary/static/index.js
+++ b/dlibrary/static/index.js
@@ -48,7 +48,7 @@ document.addEventListener('DOMContentLoaded', () => {
card.appendChild(link);
const thumb = document.createElement('img');
- thumb.src = `${ROOT}/thumbnails/${work.thumbnail_filename}`;
+ thumb.src = `${ROOT}/${work.thumbnail_path}`;
link.appendChild(thumb);
const creators = document.createElement('div');
diff --git a/dlibrary/templates/categorization.html b/dlibrary/templates/categorization.html
index 7849819..40e6817 100644
--- a/dlibrary/templates/categorization.html
+++ b/dlibrary/templates/categorization.html
@@ -12,7 +12,7 @@
{{ cat }}
{% if samples[cat] %}
-
+
{% endif %}
diff --git a/dlibrary/templates/list.html b/dlibrary/templates/list.html
index 0e28cc5..940ca91 100644
--- a/dlibrary/templates/list.html
+++ b/dlibrary/templates/list.html
@@ -7,7 +7,7 @@
{% for work in works %}