3 changed files with 5 additions and 98 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -4,7 +4,7 @@ import argparse
 import asyncio
 import importlib_resources as resources
 from pathlib import Path
-import os
+from os import getenv
 from os.path import relpath, splitext
 import re
 import shutil
@ -15,7 +15,6 @@ import zipfile
 from dlsite_async import DlsiteAPI
 import fitz
 from PIL import Image
 from jinja2 import Environment, PackageLoader, select_autoescape
 import requests
@ -44,7 +43,7 @@ ALT_VERSIONS = [
 IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
 IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
-IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd', '.mp4']
+IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd']
 def open_zipfile_with_encoding(path):
    try:
@ -326,25 +325,12 @@ def link_ordered_files(ordering, dest, start_index):
        link_path = dest / f'{idx:04d}{ext}'
        link_path.symlink_to(relpath(src_path, dest))
 def ignoreable(path):
    return path.name in IGNOREABLE_FILES or path.suffix.lower() in IGNOREABLE_EXTENSIONS
 def ls_ignore(directory):
    return [
        path for path in directory.iterdir()
-        if not ignoreable(path)
+        if path.name not in IGNOREABLE_FILES and path.suffix.lower() not in IGNOREABLE_EXTENSIONS
    ]
 def descendant_files_ignore(directory):
    result = []
    for item in ls_ignore(directory):
        if item.is_dir():
            result.extend(descendant_files_ignore(item))
        else:
            result.append(item)
    return result
 def collate(args):
    con = sqlite3.connect(args.destdir / 'meta.db')
    cur = con.cursor()
@ -406,29 +392,6 @@ def collate_regex_later(srcs, dest, regex, start_index):
        return None
    return nonmatching_pages + matching_pages
 def standalone_image_size(filepath):
    with Image.open(filepath) as im:
        return im.size
 def pdf_image_sizes(filepath):
    sizes_by_xref = {}
    with fitz.open(filepath) as pdf:
        for page in pdf:
            for (xref, _, width, height, *_) in page.get_images():
                if xref in sizes_by_xref:
                    continue
                sizes_by_xref[xref] = (width, height)
    return list(sizes_by_xref.values())
 def median(items):
    if len(items) == 0:
        return None
    items.sort()
    return items[len(items) // 2]
 def collate_from_paths(srcs, dest, start_index):
    if len(srcs) == 1 and srcs[0].is_dir():
        return collate_from_paths(ls_ignore(srcs[0]), dest, start_index)
@ -457,34 +420,6 @@ def collate_from_paths(srcs, dest, start_index):
        else:
            return None
    pdfs = [src for src in srcs if src.is_file() and src.suffix.lower() == '.pdf']
    if len(pdfs) == 1:
        pdf = pdfs[0]
        images = []
        non_images = []
        descendant_files = [
            src for src in srcs if src != pdf and src.is_file()
        ] + [
            f for src in srcs if src.is_dir() for f in descendant_files_ignore(src)
        ]
        for f in descendant_files:
            if f.suffix.lower() in IMAGE_FILE_EXTENSIONS:
                images.append(f)
            else:
                non_images.append(f)
                break
        if len(non_images) == 0 and len(images) > 0:
            pdf_sizes = pdf_image_sizes(pdf)
            standalone_sizes = [standalone_image_size(f) for f in images]
            if abs(len(pdf_sizes) - len(standalone_sizes)) <= 2:
                median_pdf_size = median(pdf_sizes)
                median_standalone_size = median(standalone_sizes)
                if median_pdf_size and median_standalone_size:
                    if median_standalone_size[0] >= median_pdf_size[0] and median_standalone_size[1] >= median_pdf_size[1]:
                        return collate_from_paths([src for src in srcs if src != pdf], dest, start_index)
                    if median_pdf_size[0] >= median_standalone_size[0] and median_pdf_size[1] >= median_standalone_size[1]:
                        return collate_from_paths([pdf], dest, start_index)
    return None
 def self_and_parents(path):
@ -527,28 +462,6 @@ def manual_collate(args):
            print(f'Unknown file type {path}, stopping')
            return
 def fmt_size(s):
    return f'{s[0]}x{s[1]}px'
 def analyze(args):
    extract_dir = args.destdir / 'extract'
    files = descendant_files_ignore(extract_dir / args.work_id)
    files.sort()
    for f in files:
        print(f'{relpath(f, extract_dir)}', end='')
        if f.suffix.lower() in IMAGE_FILE_EXTENSIONS:
            size = standalone_image_size(f)
            print(f'\t{fmt_size(size)}')
        elif f.suffix.lower() == '.pdf':
            sizes = pdf_image_sizes(f)
            if len(sizes) == 0:
                print(f'\tContains no images')
            else:
                print(f'\t{len(sizes)} images, median {fmt_size(median(sizes))}, min {fmt_size(min(sizes))}, max {fmt_size(max(sizes))}')
        else:
            print()
 def metadata(args):
    con = sqlite3.connect(args.destdir / 'meta.db')
    cur = con.cursor()
@ -728,7 +641,7 @@ argparser = argparse.ArgumentParser(
 argparser.add_argument(
    '-d', '--destdir',
    type=Path,
-    default=Path(os.getenv('DLIBRARY_DIR', './dlibrary')),
+    default=Path(getenv('DLIBRARY_DIR', './dlibrary')),
    help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
 )
 subparsers = argparser.add_subparsers(title="subcommands", required=True)
@ -752,7 +665,7 @@ parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnail
 parser_fetch.add_argument(
    '-l', '--locale',
    type=str,
-    default=os.getenv('DLIBRARY_LOCALE', 'en_US'),
+    default=getenv('DLIBRARY_LOCALE', 'en_US'),
    help=('locale to use when requesting metadata (e.g. "ja_JP", "en_US"). '
          'May still fall back to Japanese if metadata in other languages is unavailable. '
          '(default: $DLIBRARY_LOCALE or en_US)'),
@ -835,10 +748,6 @@ parser_manual_collate.add_argument(
 )
 parser_manual_collate.set_defaults(func=manual_collate)
 parser_analyze = subparsers.add_parser('analyze', help='analyze an extracted folder to assist in collation')
 parser_analyze.add_argument('work_id')
 parser_analyze.set_defaults(func=analyze)
 parser_metadata = subparsers.add_parser('metadata', help='view or modify metadata for a work')
 parser_metadata.add_argument('work_id')
 parser_metadata.add_argument(
--- a/flake.nix
+++ b/flake.nix
@ -51,7 +51,6 @@
        pyproject = true;
        propagatedBuildInputs = [
          pymupdf
          pillow
          requests
          dlsite-async
          jinja2
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,6 @@ authors = [{name = "xenofem"}]
 dependencies = [
    "requests",
    "PyMuPDF",
    "pillow",
    "dlsite-async",
    "jinja2",
    "importlib_resources",