add more detailed help and allow configuring destdir with environment variable

2024-01-23 15:54:17 -05:00 · 2024-01-23 15:54:17 -05:00 · 1f15abed9a
commit 1f15abed9a
parent fb7d275ebb
1 changed files with 101 additions and 10 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -4,10 +4,12 @@ import argparse
 import asyncio
 import importlib_resources as resources
 from pathlib import Path
 from os import getenv
 from os.path import relpath, splitext
 import re
 import shutil
 import sqlite3
 import textwrap
 from urllib.parse import urlparse
 import zipfile
@ -301,7 +303,7 @@ def copy_contents(src, dest):
    for item in src.iterdir():
        shutil.copyfile(item, dest / item.name)
-def publish(args):
+def generate(args):
    jenv = Environment(
        loader=PackageLoader("dlibrary"),
        autoescape=select_autoescape()
@ -408,12 +410,33 @@ def publish(args):
    con.close()
-argparser = argparse.ArgumentParser(prog='dlibrary')
+argparser = argparse.ArgumentParser(
    prog='dlibrary',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description=textwrap.dedent("""\
    Organize DRM-free works purchased from DLSite into a library
    that can be viewed in a web browser.
    Intended workflow:
    - `extract` a collection of zipfiles downloaded from DLSite
      into DLibrary's data directory, giving each work its own
      subfolder.
    - `fetch` metadata and thumbnail images for extracted works
      from DLSite.
    - `collate` and/or `manual-collate` extracted works,
      producing a single sequence of image files (or symlinks
      into the extracted data, when possible) for each work.
    - Manually adjust works' `metadata` when necessary.
    - `generate` a static website providing a catalog and viewer
      for all collated works.
    """),
 )
 argparser.add_argument(
    '-d', '--destdir',
    type=Path,
-    default=Path('./dlibrary'),
+    default=Path(getenv('DLIBRARY_DIR', './dlibrary')),
-    help='directory to store dlibrary content and metadata to (default: ./dlibrary)',
+    help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
 )
 subparsers = argparser.add_subparsers(title="subcommands", required=True)
@ -435,23 +458,79 @@ parser_extract.set_defaults(func=extract)
 parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnails')
 parser_fetch.set_defaults(func=fetch)
-parser_collate = subparsers.add_parser('collate', help='collate a single sequence of image files for each work')
+parser_collate = subparsers.add_parser(
    'collate',
    help='collate each work into a sequence of image files',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description=textwrap.dedent("""\
    For each extracted work that has not already been collated,
    DLibrary will attempt to intuit its structure as follows:
    - Enter the work's directory. If the directory contains
      nothing except a single subdirectory (ignoring a few types
      of files that are definitely not relevant), traverse
      downwards repeatedly.
    - If the current directory contains nothing except a single
      PDF (again, ignoring irrelevant files), attempt to extract
      a series of images from the PDF. This process expects that
      each page of the PDF consists of a single embedded image,
      which will be extracted at full resolution. Support for
      more complex PDFs is not yet implemented.
    - If the current directory contains nothing except image
      files, and the image files are named in a way that clearly
      indicates a complete numerical order (each filename
      consists of a shared prefix followed by a distinct
      number), symlink files in the inferred order.
    - Otherwise, skip processing this work for now.
    DLibrary can be given "collation hints" which provide
    alternative starting points for this search process. A hint
    is a path under $DLIBRARY_DIR/extract/[work id]/
    indicating a different directory or PDF file to begin the
    search process for that work, rather than starting at the
    top level of the extracted data. There can be at most one
    hint per work; for more complicated scenarios where a work
    includes multiple folders that need to be collated together,
    or where filenames do not clearly indicate an ordering, use
    `manual-collate` instead.
    """),
 )
 parser_collate.add_argument(
    'hints',
    metavar='PATH',
    type=Path,
    nargs='*',
-    help='manually-specified paths of subdirectories or PDFs within extraction folders, at most one per work',
+    help='paths within extraction folders as collation hints'
 )
 parser_collate.set_defaults(func=collate)
-parser_manual_collate = subparsers.add_parser('manual-collate', help='collate a specific work manually, specifying all paths to include')
+parser_manual_collate = subparsers.add_parser(
    'manual-collate',
    help='collate a single work manually',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description=textwrap.dedent("""\
    All provided paths must be under $DLIBRARY_DIR/extract/[work id]/
    for the work being manually collated. `manual-collate` can
    only handle one work at a time. Paths are used as follows:
    - If a path is a directory, all *image files* immediately
      inside that directory will be appended to the sequence. If
      files are named in a way which indicates a clear ordering,
      that ordering will be used. Otherwise, filenames will be
      sorted lexicographically. Non-image files and
      subdirectories will be ignored.
    - If a path is an image file, that image file will be
      appended to the sequence.
    - If a path is a PDF file, page images will be extracted
      from that PDF and appended to the sequence.
 """),
 )
 parser_manual_collate.add_argument(
    'paths',
    metavar='PATH',
    type=Path,
    nargs='+',
-    help='paths of files (images to symlink, pdfs to extract) or directories (symlink all images in the directory, no recursion, best-effort sorting)'
+    help='paths within a single work to be collated in sequence',
 )
 parser_manual_collate.set_defaults(func=manual_collate)
@ -464,8 +543,20 @@ parser_metadata.add_argument(
 )
 parser_metadata.set_defaults(func=metadata)
-parser_publish = subparsers.add_parser('publish', help='generate HTML/CSS/JS for library site')
+parser_generate = subparsers.add_parser(
-parser_publish.set_defaults(func=publish)
+    'generate',
    help='generate HTML/CSS/JS for library site',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description=textwrap.dedent("""\
    The static site will be generated under $DLIBRARY_DIR/site/
    and can be served by pointing an HTTP server at that
    directory. Note that some files inside the static site
    hierarchy will be symlinks into $DLIBRARY_DIR/extract/
    outside the site hierarchy, so make sure your HTTP server
    will allow those symlinks to be read.
    """),
 )
 parser_generate.set_defaults(func=generate)
 def main():
    args = argparser.parse_args()