From 1f15abed9a8bab30a81a41cc7e7230dc61f948b1 Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 23 Jan 2024 15:54:17 -0500 Subject: [PATCH] add more detailed help and allow configuring destdir with environment variable --- dlibrary/dlibrary.py | 111 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 101 insertions(+), 10 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 46ffd99..e6e910a 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -4,10 +4,12 @@ import argparse import asyncio import importlib_resources as resources from pathlib import Path +from os import getenv from os.path import relpath, splitext import re import shutil import sqlite3 +import textwrap from urllib.parse import urlparse import zipfile @@ -301,7 +303,7 @@ def copy_contents(src, dest): for item in src.iterdir(): shutil.copyfile(item, dest / item.name) -def publish(args): +def generate(args): jenv = Environment( loader=PackageLoader("dlibrary"), autoescape=select_autoescape() @@ -408,12 +410,33 @@ def publish(args): con.close() -argparser = argparse.ArgumentParser(prog='dlibrary') +argparser = argparse.ArgumentParser( + prog='dlibrary', + formatter_class=argparse.RawDescriptionHelpFormatter, + description=textwrap.dedent("""\ + Organize DRM-free works purchased from DLSite into a library + that can be viewed in a web browser. + + Intended workflow: + - `extract` a collection of zipfiles downloaded from DLSite + into DLibrary's data directory, giving each work its own + subfolder. + - `fetch` metadata and thumbnail images for extracted works + from DLSite. + - `collate` and/or `manual-collate` extracted works, + producing a single sequence of image files (or symlinks + into the extracted data, when possible) for each work. + - Manually adjust works' `metadata` when necessary. + - `generate` a static website providing a catalog and viewer + for all collated works. + """), +) + argparser.add_argument( '-d', '--destdir', type=Path, - default=Path('./dlibrary'), - help='directory to store dlibrary content and metadata to (default: ./dlibrary)', + default=Path(getenv('DLIBRARY_DIR', './dlibrary')), + help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)', ) subparsers = argparser.add_subparsers(title="subcommands", required=True) @@ -435,23 +458,79 @@ parser_extract.set_defaults(func=extract) parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnails') parser_fetch.set_defaults(func=fetch) -parser_collate = subparsers.add_parser('collate', help='collate a single sequence of image files for each work') +parser_collate = subparsers.add_parser( + 'collate', + help='collate each work into a sequence of image files', + formatter_class=argparse.RawDescriptionHelpFormatter, + description=textwrap.dedent("""\ + For each extracted work that has not already been collated, + DLibrary will attempt to intuit its structure as follows: + + - Enter the work's directory. If the directory contains + nothing except a single subdirectory (ignoring a few types + of files that are definitely not relevant), traverse + downwards repeatedly. + - If the current directory contains nothing except a single + PDF (again, ignoring irrelevant files), attempt to extract + a series of images from the PDF. This process expects that + each page of the PDF consists of a single embedded image, + which will be extracted at full resolution. Support for + more complex PDFs is not yet implemented. + - If the current directory contains nothing except image + files, and the image files are named in a way that clearly + indicates a complete numerical order (each filename + consists of a shared prefix followed by a distinct + number), symlink files in the inferred order. + - Otherwise, skip processing this work for now. + + DLibrary can be given "collation hints" which provide + alternative starting points for this search process. A hint + is a path under $DLIBRARY_DIR/extract/[work id]/ + indicating a different directory or PDF file to begin the + search process for that work, rather than starting at the + top level of the extracted data. There can be at most one + hint per work; for more complicated scenarios where a work + includes multiple folders that need to be collated together, + or where filenames do not clearly indicate an ordering, use + `manual-collate` instead. + """), +) parser_collate.add_argument( 'hints', metavar='PATH', type=Path, nargs='*', - help='manually-specified paths of subdirectories or PDFs within extraction folders, at most one per work', + help='paths within extraction folders as collation hints' ) parser_collate.set_defaults(func=collate) -parser_manual_collate = subparsers.add_parser('manual-collate', help='collate a specific work manually, specifying all paths to include') +parser_manual_collate = subparsers.add_parser( + 'manual-collate', + help='collate a single work manually', + formatter_class=argparse.RawDescriptionHelpFormatter, + description=textwrap.dedent("""\ + All provided paths must be under $DLIBRARY_DIR/extract/[work id]/ + for the work being manually collated. `manual-collate` can + only handle one work at a time. Paths are used as follows: + + - If a path is a directory, all *image files* immediately + inside that directory will be appended to the sequence. If + files are named in a way which indicates a clear ordering, + that ordering will be used. Otherwise, filenames will be + sorted lexicographically. Non-image files and + subdirectories will be ignored. + - If a path is an image file, that image file will be + appended to the sequence. + - If a path is a PDF file, page images will be extracted + from that PDF and appended to the sequence. +"""), +) parser_manual_collate.add_argument( 'paths', metavar='PATH', type=Path, nargs='+', - help='paths of files (images to symlink, pdfs to extract) or directories (symlink all images in the directory, no recursion, best-effort sorting)' + help='paths within a single work to be collated in sequence', ) parser_manual_collate.set_defaults(func=manual_collate) @@ -464,8 +543,20 @@ parser_metadata.add_argument( ) parser_metadata.set_defaults(func=metadata) -parser_publish = subparsers.add_parser('publish', help='generate HTML/CSS/JS for library site') -parser_publish.set_defaults(func=publish) +parser_generate = subparsers.add_parser( + 'generate', + help='generate HTML/CSS/JS for library site', + formatter_class=argparse.RawDescriptionHelpFormatter, + description=textwrap.dedent("""\ + The static site will be generated under $DLIBRARY_DIR/site/ + and can be served by pointing an HTTP server at that + directory. Note that some files inside the static site + hierarchy will be symlinks into $DLIBRARY_DIR/extract/ + outside the site hierarchy, so make sure your HTTP server + will allow those symlinks to be read. + """), +) +parser_generate.set_defaults(func=generate) def main(): args = argparser.parse_args()