From 1f15abed9a8bab30a81a41cc7e7230dc61f948b1 Mon Sep 17 00:00:00 2001
From: xenofem <xenofem@xeno.science>
Date: Tue, 23 Jan 2024 15:54:17 -0500
Subject: [PATCH] add more detailed help and allow configuring destdir with
 environment variable

---
 dlibrary/dlibrary.py | 111 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 101 insertions(+), 10 deletions(-)

diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index 46ffd99..e6e910a 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -4,10 +4,12 @@ import argparse
 import asyncio
 import importlib_resources as resources
 from pathlib import Path
+from os import getenv
 from os.path import relpath, splitext
 import re
 import shutil
 import sqlite3
+import textwrap
 from urllib.parse import urlparse
 import zipfile
 
@@ -301,7 +303,7 @@ def copy_contents(src, dest):
     for item in src.iterdir():
         shutil.copyfile(item, dest / item.name)
 
-def publish(args):
+def generate(args):
     jenv = Environment(
         loader=PackageLoader("dlibrary"),
         autoescape=select_autoescape()
@@ -408,12 +410,33 @@ def publish(args):
     con.close()
 
 
-argparser = argparse.ArgumentParser(prog='dlibrary')
+argparser = argparse.ArgumentParser(
+    prog='dlibrary',
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    description=textwrap.dedent("""\
+    Organize DRM-free works purchased from DLSite into a library
+    that can be viewed in a web browser.
+
+    Intended workflow:
+    - `extract` a collection of zipfiles downloaded from DLSite
+      into DLibrary's data directory, giving each work its own
+      subfolder.
+    - `fetch` metadata and thumbnail images for extracted works
+      from DLSite.
+    - `collate` and/or `manual-collate` extracted works,
+      producing a single sequence of image files (or symlinks
+      into the extracted data, when possible) for each work.
+    - Manually adjust works' `metadata` when necessary.
+    - `generate` a static website providing a catalog and viewer
+      for all collated works.
+    """),
+)
+
 argparser.add_argument(
     '-d', '--destdir',
     type=Path,
-    default=Path('./dlibrary'),
-    help='directory to store dlibrary content and metadata to (default: ./dlibrary)',
+    default=Path(getenv('DLIBRARY_DIR', './dlibrary')),
+    help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
 )
 subparsers = argparser.add_subparsers(title="subcommands", required=True)
 
@@ -435,23 +458,79 @@ parser_extract.set_defaults(func=extract)
 parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnails')
 parser_fetch.set_defaults(func=fetch)
 
-parser_collate = subparsers.add_parser('collate', help='collate a single sequence of image files for each work')
+parser_collate = subparsers.add_parser(
+    'collate',
+    help='collate each work into a sequence of image files',
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    description=textwrap.dedent("""\
+    For each extracted work that has not already been collated,
+    DLibrary will attempt to intuit its structure as follows:
+
+    - Enter the work's directory. If the directory contains
+      nothing except a single subdirectory (ignoring a few types
+      of files that are definitely not relevant), traverse
+      downwards repeatedly.
+    - If the current directory contains nothing except a single
+      PDF (again, ignoring irrelevant files), attempt to extract
+      a series of images from the PDF. This process expects that
+      each page of the PDF consists of a single embedded image,
+      which will be extracted at full resolution. Support for
+      more complex PDFs is not yet implemented.
+    - If the current directory contains nothing except image
+      files, and the image files are named in a way that clearly
+      indicates a complete numerical order (each filename
+      consists of a shared prefix followed by a distinct
+      number), symlink files in the inferred order.
+    - Otherwise, skip processing this work for now.
+
+    DLibrary can be given "collation hints" which provide
+    alternative starting points for this search process. A hint
+    is a path under $DLIBRARY_DIR/extract/[work id]/
+    indicating a different directory or PDF file to begin the
+    search process for that work, rather than starting at the
+    top level of the extracted data. There can be at most one
+    hint per work; for more complicated scenarios where a work
+    includes multiple folders that need to be collated together,
+    or where filenames do not clearly indicate an ordering, use
+    `manual-collate` instead.
+    """),
+)
 parser_collate.add_argument(
     'hints',
     metavar='PATH',
     type=Path,
     nargs='*',
-    help='manually-specified paths of subdirectories or PDFs within extraction folders, at most one per work',
+    help='paths within extraction folders as collation hints'
 )
 parser_collate.set_defaults(func=collate)
 
-parser_manual_collate = subparsers.add_parser('manual-collate', help='collate a specific work manually, specifying all paths to include')
+parser_manual_collate = subparsers.add_parser(
+    'manual-collate',
+    help='collate a single work manually',
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    description=textwrap.dedent("""\
+    All provided paths must be under $DLIBRARY_DIR/extract/[work id]/
+    for the work being manually collated. `manual-collate` can
+    only handle one work at a time. Paths are used as follows:
+
+    - If a path is a directory, all *image files* immediately
+      inside that directory will be appended to the sequence. If
+      files are named in a way which indicates a clear ordering,
+      that ordering will be used. Otherwise, filenames will be
+      sorted lexicographically. Non-image files and
+      subdirectories will be ignored.
+    - If a path is an image file, that image file will be
+      appended to the sequence.
+    - If a path is a PDF file, page images will be extracted
+      from that PDF and appended to the sequence.
+"""),
+)
 parser_manual_collate.add_argument(
     'paths',
     metavar='PATH',
     type=Path,
     nargs='+',
-    help='paths of files (images to symlink, pdfs to extract) or directories (symlink all images in the directory, no recursion, best-effort sorting)'
+    help='paths within a single work to be collated in sequence',
 )
 parser_manual_collate.set_defaults(func=manual_collate)
 
@@ -464,8 +543,20 @@ parser_metadata.add_argument(
 )
 parser_metadata.set_defaults(func=metadata)
 
-parser_publish = subparsers.add_parser('publish', help='generate HTML/CSS/JS for library site')
-parser_publish.set_defaults(func=publish)
+parser_generate = subparsers.add_parser(
+    'generate',
+    help='generate HTML/CSS/JS for library site',
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    description=textwrap.dedent("""\
+    The static site will be generated under $DLIBRARY_DIR/site/
+    and can be served by pointing an HTTP server at that
+    directory. Note that some files inside the static site
+    hierarchy will be symlinks into $DLIBRARY_DIR/extract/
+    outside the site hierarchy, so make sure your HTTP server
+    will allow those symlinks to be read.
+    """),
+)
+parser_generate.set_defaults(func=generate)
 
 def main():
     args = argparser.parse_args()