diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 3d313b5..5e2ddb8 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -4,7 +4,7 @@ import argparse import asyncio import importlib_resources as resources from pathlib import Path -import os +from os import getenv from os.path import relpath, splitext import re import shutil @@ -15,7 +15,6 @@ import zipfile from dlsite_async import DlsiteAPI import fitz -from PIL import Image from jinja2 import Environment, PackageLoader, select_autoescape import requests @@ -44,7 +43,7 @@ ALT_VERSIONS = [ IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff'] IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store'] -IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd', '.mp4'] +IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd'] def open_zipfile_with_encoding(path): try: @@ -326,25 +325,12 @@ def link_ordered_files(ordering, dest, start_index): link_path = dest / f'{idx:04d}{ext}' link_path.symlink_to(relpath(src_path, dest)) -def ignoreable(path): - return path.name in IGNOREABLE_FILES or path.suffix.lower() in IGNOREABLE_EXTENSIONS - def ls_ignore(directory): return [ path for path in directory.iterdir() - if not ignoreable(path) + if path.name not in IGNOREABLE_FILES and path.suffix.lower() not in IGNOREABLE_EXTENSIONS ] -def descendant_files_ignore(directory): - result = [] - for item in ls_ignore(directory): - if item.is_dir(): - result.extend(descendant_files_ignore(item)) - else: - result.append(item) - - return result - def collate(args): con = sqlite3.connect(args.destdir / 'meta.db') cur = con.cursor() @@ -406,29 +392,6 @@ def collate_regex_later(srcs, dest, regex, start_index): return None return nonmatching_pages + matching_pages -def standalone_image_size(filepath): - with Image.open(filepath) as im: - return im.size - -def pdf_image_sizes(filepath): - sizes_by_xref = {} - - with fitz.open(filepath) as pdf: - for page in pdf: - for (xref, _, width, height, *_) in page.get_images(): - if xref in sizes_by_xref: - continue - sizes_by_xref[xref] = (width, height) - - return list(sizes_by_xref.values()) - -def median(items): - if len(items) == 0: - return None - - items.sort() - return items[len(items) // 2] - def collate_from_paths(srcs, dest, start_index): if len(srcs) == 1 and srcs[0].is_dir(): return collate_from_paths(ls_ignore(srcs[0]), dest, start_index) @@ -457,34 +420,6 @@ def collate_from_paths(srcs, dest, start_index): else: return None - pdfs = [src for src in srcs if src.is_file() and src.suffix.lower() == '.pdf'] - if len(pdfs) == 1: - pdf = pdfs[0] - images = [] - non_images = [] - descendant_files = [ - src for src in srcs if src != pdf and src.is_file() - ] + [ - f for src in srcs if src.is_dir() for f in descendant_files_ignore(src) - ] - for f in descendant_files: - if f.suffix.lower() in IMAGE_FILE_EXTENSIONS: - images.append(f) - else: - non_images.append(f) - break - if len(non_images) == 0 and len(images) > 0: - pdf_sizes = pdf_image_sizes(pdf) - standalone_sizes = [standalone_image_size(f) for f in images] - if abs(len(pdf_sizes) - len(standalone_sizes)) <= 2: - median_pdf_size = median(pdf_sizes) - median_standalone_size = median(standalone_sizes) - if median_pdf_size and median_standalone_size: - if median_standalone_size[0] >= median_pdf_size[0] and median_standalone_size[1] >= median_pdf_size[1]: - return collate_from_paths([src for src in srcs if src != pdf], dest, start_index) - if median_pdf_size[0] >= median_standalone_size[0] and median_pdf_size[1] >= median_standalone_size[1]: - return collate_from_paths([pdf], dest, start_index) - return None def self_and_parents(path): @@ -527,28 +462,6 @@ def manual_collate(args): print(f'Unknown file type {path}, stopping') return -def fmt_size(s): - return f'{s[0]}x{s[1]}px' - -def analyze(args): - extract_dir = args.destdir / 'extract' - files = descendant_files_ignore(extract_dir / args.work_id) - files.sort() - - for f in files: - print(f'{relpath(f, extract_dir)}', end='') - if f.suffix.lower() in IMAGE_FILE_EXTENSIONS: - size = standalone_image_size(f) - print(f'\t{fmt_size(size)}') - elif f.suffix.lower() == '.pdf': - sizes = pdf_image_sizes(f) - if len(sizes) == 0: - print(f'\tContains no images') - else: - print(f'\t{len(sizes)} images, median {fmt_size(median(sizes))}, min {fmt_size(min(sizes))}, max {fmt_size(max(sizes))}') - else: - print() - def metadata(args): con = sqlite3.connect(args.destdir / 'meta.db') cur = con.cursor() @@ -728,7 +641,7 @@ argparser = argparse.ArgumentParser( argparser.add_argument( '-d', '--destdir', type=Path, - default=Path(os.getenv('DLIBRARY_DIR', './dlibrary')), + default=Path(getenv('DLIBRARY_DIR', './dlibrary')), help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)', ) subparsers = argparser.add_subparsers(title="subcommands", required=True) @@ -752,7 +665,7 @@ parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnail parser_fetch.add_argument( '-l', '--locale', type=str, - default=os.getenv('DLIBRARY_LOCALE', 'en_US'), + default=getenv('DLIBRARY_LOCALE', 'en_US'), help=('locale to use when requesting metadata (e.g. "ja_JP", "en_US"). ' 'May still fall back to Japanese if metadata in other languages is unavailable. ' '(default: $DLIBRARY_LOCALE or en_US)'), @@ -835,10 +748,6 @@ parser_manual_collate.add_argument( ) parser_manual_collate.set_defaults(func=manual_collate) -parser_analyze = subparsers.add_parser('analyze', help='analyze an extracted folder to assist in collation') -parser_analyze.add_argument('work_id') -parser_analyze.set_defaults(func=analyze) - parser_metadata = subparsers.add_parser('metadata', help='view or modify metadata for a work') parser_metadata.add_argument('work_id') parser_metadata.add_argument( diff --git a/flake.nix b/flake.nix index 4ae830b..63d5f48 100644 --- a/flake.nix +++ b/flake.nix @@ -51,7 +51,6 @@ pyproject = true; propagatedBuildInputs = [ pymupdf - pillow requests dlsite-async jinja2 diff --git a/pyproject.toml b/pyproject.toml index 7ecfcfd..58df9bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,6 @@ authors = [{name = "xenofem"}] dependencies = [ "requests", "PyMuPDF", - "pillow", "dlsite-async", "jinja2", "importlib_resources",