Compare commits
No commits in common. "c2f516a281d5a2b9c8c2cc3d58dd055be7dde5ec" and "9cc51ace4a4a8a886ed6c767300ffa1655c135a1" have entirely different histories.
c2f516a281
...
9cc51ace4a
3 changed files with 5 additions and 98 deletions
|
@ -4,7 +4,7 @@ import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import importlib_resources as resources
|
import importlib_resources as resources
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import os
|
from os import getenv
|
||||||
from os.path import relpath, splitext
|
from os.path import relpath, splitext
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
@ -15,7 +15,6 @@ import zipfile
|
||||||
|
|
||||||
from dlsite_async import DlsiteAPI
|
from dlsite_async import DlsiteAPI
|
||||||
import fitz
|
import fitz
|
||||||
from PIL import Image
|
|
||||||
from jinja2 import Environment, PackageLoader, select_autoescape
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
@ -44,7 +43,7 @@ ALT_VERSIONS = [
|
||||||
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
|
IMAGE_FILE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.tiff']
|
||||||
|
|
||||||
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
|
IGNOREABLE_FILES = ['Thumbs.db', '__MACOSX', '.DS_Store']
|
||||||
IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd', '.mp4']
|
IGNOREABLE_EXTENSIONS = ['.txt', '.html', '.htm', '.psd']
|
||||||
|
|
||||||
def open_zipfile_with_encoding(path):
|
def open_zipfile_with_encoding(path):
|
||||||
try:
|
try:
|
||||||
|
@ -326,25 +325,12 @@ def link_ordered_files(ordering, dest, start_index):
|
||||||
link_path = dest / f'{idx:04d}{ext}'
|
link_path = dest / f'{idx:04d}{ext}'
|
||||||
link_path.symlink_to(relpath(src_path, dest))
|
link_path.symlink_to(relpath(src_path, dest))
|
||||||
|
|
||||||
def ignoreable(path):
|
|
||||||
return path.name in IGNOREABLE_FILES or path.suffix.lower() in IGNOREABLE_EXTENSIONS
|
|
||||||
|
|
||||||
def ls_ignore(directory):
|
def ls_ignore(directory):
|
||||||
return [
|
return [
|
||||||
path for path in directory.iterdir()
|
path for path in directory.iterdir()
|
||||||
if not ignoreable(path)
|
if path.name not in IGNOREABLE_FILES and path.suffix.lower() not in IGNOREABLE_EXTENSIONS
|
||||||
]
|
]
|
||||||
|
|
||||||
def descendant_files_ignore(directory):
|
|
||||||
result = []
|
|
||||||
for item in ls_ignore(directory):
|
|
||||||
if item.is_dir():
|
|
||||||
result.extend(descendant_files_ignore(item))
|
|
||||||
else:
|
|
||||||
result.append(item)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def collate(args):
|
def collate(args):
|
||||||
con = sqlite3.connect(args.destdir / 'meta.db')
|
con = sqlite3.connect(args.destdir / 'meta.db')
|
||||||
cur = con.cursor()
|
cur = con.cursor()
|
||||||
|
@ -406,29 +392,6 @@ def collate_regex_later(srcs, dest, regex, start_index):
|
||||||
return None
|
return None
|
||||||
return nonmatching_pages + matching_pages
|
return nonmatching_pages + matching_pages
|
||||||
|
|
||||||
def standalone_image_size(filepath):
|
|
||||||
with Image.open(filepath) as im:
|
|
||||||
return im.size
|
|
||||||
|
|
||||||
def pdf_image_sizes(filepath):
|
|
||||||
sizes_by_xref = {}
|
|
||||||
|
|
||||||
with fitz.open(filepath) as pdf:
|
|
||||||
for page in pdf:
|
|
||||||
for (xref, _, width, height, *_) in page.get_images():
|
|
||||||
if xref in sizes_by_xref:
|
|
||||||
continue
|
|
||||||
sizes_by_xref[xref] = (width, height)
|
|
||||||
|
|
||||||
return list(sizes_by_xref.values())
|
|
||||||
|
|
||||||
def median(items):
|
|
||||||
if len(items) == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
items.sort()
|
|
||||||
return items[len(items) // 2]
|
|
||||||
|
|
||||||
def collate_from_paths(srcs, dest, start_index):
|
def collate_from_paths(srcs, dest, start_index):
|
||||||
if len(srcs) == 1 and srcs[0].is_dir():
|
if len(srcs) == 1 and srcs[0].is_dir():
|
||||||
return collate_from_paths(ls_ignore(srcs[0]), dest, start_index)
|
return collate_from_paths(ls_ignore(srcs[0]), dest, start_index)
|
||||||
|
@ -457,34 +420,6 @@ def collate_from_paths(srcs, dest, start_index):
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
pdfs = [src for src in srcs if src.is_file() and src.suffix.lower() == '.pdf']
|
|
||||||
if len(pdfs) == 1:
|
|
||||||
pdf = pdfs[0]
|
|
||||||
images = []
|
|
||||||
non_images = []
|
|
||||||
descendant_files = [
|
|
||||||
src for src in srcs if src != pdf and src.is_file()
|
|
||||||
] + [
|
|
||||||
f for src in srcs if src.is_dir() for f in descendant_files_ignore(src)
|
|
||||||
]
|
|
||||||
for f in descendant_files:
|
|
||||||
if f.suffix.lower() in IMAGE_FILE_EXTENSIONS:
|
|
||||||
images.append(f)
|
|
||||||
else:
|
|
||||||
non_images.append(f)
|
|
||||||
break
|
|
||||||
if len(non_images) == 0 and len(images) > 0:
|
|
||||||
pdf_sizes = pdf_image_sizes(pdf)
|
|
||||||
standalone_sizes = [standalone_image_size(f) for f in images]
|
|
||||||
if abs(len(pdf_sizes) - len(standalone_sizes)) <= 2:
|
|
||||||
median_pdf_size = median(pdf_sizes)
|
|
||||||
median_standalone_size = median(standalone_sizes)
|
|
||||||
if median_pdf_size and median_standalone_size:
|
|
||||||
if median_standalone_size[0] >= median_pdf_size[0] and median_standalone_size[1] >= median_pdf_size[1]:
|
|
||||||
return collate_from_paths([src for src in srcs if src != pdf], dest, start_index)
|
|
||||||
if median_pdf_size[0] >= median_standalone_size[0] and median_pdf_size[1] >= median_standalone_size[1]:
|
|
||||||
return collate_from_paths([pdf], dest, start_index)
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def self_and_parents(path):
|
def self_and_parents(path):
|
||||||
|
@ -527,28 +462,6 @@ def manual_collate(args):
|
||||||
print(f'Unknown file type {path}, stopping')
|
print(f'Unknown file type {path}, stopping')
|
||||||
return
|
return
|
||||||
|
|
||||||
def fmt_size(s):
|
|
||||||
return f'{s[0]}x{s[1]}px'
|
|
||||||
|
|
||||||
def analyze(args):
|
|
||||||
extract_dir = args.destdir / 'extract'
|
|
||||||
files = descendant_files_ignore(extract_dir / args.work_id)
|
|
||||||
files.sort()
|
|
||||||
|
|
||||||
for f in files:
|
|
||||||
print(f'{relpath(f, extract_dir)}', end='')
|
|
||||||
if f.suffix.lower() in IMAGE_FILE_EXTENSIONS:
|
|
||||||
size = standalone_image_size(f)
|
|
||||||
print(f'\t{fmt_size(size)}')
|
|
||||||
elif f.suffix.lower() == '.pdf':
|
|
||||||
sizes = pdf_image_sizes(f)
|
|
||||||
if len(sizes) == 0:
|
|
||||||
print(f'\tContains no images')
|
|
||||||
else:
|
|
||||||
print(f'\t{len(sizes)} images, median {fmt_size(median(sizes))}, min {fmt_size(min(sizes))}, max {fmt_size(max(sizes))}')
|
|
||||||
else:
|
|
||||||
print()
|
|
||||||
|
|
||||||
def metadata(args):
|
def metadata(args):
|
||||||
con = sqlite3.connect(args.destdir / 'meta.db')
|
con = sqlite3.connect(args.destdir / 'meta.db')
|
||||||
cur = con.cursor()
|
cur = con.cursor()
|
||||||
|
@ -728,7 +641,7 @@ argparser = argparse.ArgumentParser(
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
'-d', '--destdir',
|
'-d', '--destdir',
|
||||||
type=Path,
|
type=Path,
|
||||||
default=Path(os.getenv('DLIBRARY_DIR', './dlibrary')),
|
default=Path(getenv('DLIBRARY_DIR', './dlibrary')),
|
||||||
help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
|
help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
|
||||||
)
|
)
|
||||||
subparsers = argparser.add_subparsers(title="subcommands", required=True)
|
subparsers = argparser.add_subparsers(title="subcommands", required=True)
|
||||||
|
@ -752,7 +665,7 @@ parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnail
|
||||||
parser_fetch.add_argument(
|
parser_fetch.add_argument(
|
||||||
'-l', '--locale',
|
'-l', '--locale',
|
||||||
type=str,
|
type=str,
|
||||||
default=os.getenv('DLIBRARY_LOCALE', 'en_US'),
|
default=getenv('DLIBRARY_LOCALE', 'en_US'),
|
||||||
help=('locale to use when requesting metadata (e.g. "ja_JP", "en_US"). '
|
help=('locale to use when requesting metadata (e.g. "ja_JP", "en_US"). '
|
||||||
'May still fall back to Japanese if metadata in other languages is unavailable. '
|
'May still fall back to Japanese if metadata in other languages is unavailable. '
|
||||||
'(default: $DLIBRARY_LOCALE or en_US)'),
|
'(default: $DLIBRARY_LOCALE or en_US)'),
|
||||||
|
@ -835,10 +748,6 @@ parser_manual_collate.add_argument(
|
||||||
)
|
)
|
||||||
parser_manual_collate.set_defaults(func=manual_collate)
|
parser_manual_collate.set_defaults(func=manual_collate)
|
||||||
|
|
||||||
parser_analyze = subparsers.add_parser('analyze', help='analyze an extracted folder to assist in collation')
|
|
||||||
parser_analyze.add_argument('work_id')
|
|
||||||
parser_analyze.set_defaults(func=analyze)
|
|
||||||
|
|
||||||
parser_metadata = subparsers.add_parser('metadata', help='view or modify metadata for a work')
|
parser_metadata = subparsers.add_parser('metadata', help='view or modify metadata for a work')
|
||||||
parser_metadata.add_argument('work_id')
|
parser_metadata.add_argument('work_id')
|
||||||
parser_metadata.add_argument(
|
parser_metadata.add_argument(
|
||||||
|
|
|
@ -51,7 +51,6 @@
|
||||||
pyproject = true;
|
pyproject = true;
|
||||||
propagatedBuildInputs = [
|
propagatedBuildInputs = [
|
||||||
pymupdf
|
pymupdf
|
||||||
pillow
|
|
||||||
requests
|
requests
|
||||||
dlsite-async
|
dlsite-async
|
||||||
jinja2
|
jinja2
|
||||||
|
|
|
@ -7,7 +7,6 @@ authors = [{name = "xenofem"}]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"requests",
|
"requests",
|
||||||
"PyMuPDF",
|
"PyMuPDF",
|
||||||
"pillow",
|
|
||||||
"dlsite-async",
|
"dlsite-async",
|
||||||
"jinja2",
|
"jinja2",
|
||||||
"importlib_resources",
|
"importlib_resources",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue