add more detailed help and allow configuring destdir with environment variable

This commit is contained in:
xenofem 2024-01-23 15:54:17 -05:00
parent fb7d275ebb
commit 1f15abed9a

View file

@ -4,10 +4,12 @@ import argparse
import asyncio import asyncio
import importlib_resources as resources import importlib_resources as resources
from pathlib import Path from pathlib import Path
from os import getenv
from os.path import relpath, splitext from os.path import relpath, splitext
import re import re
import shutil import shutil
import sqlite3 import sqlite3
import textwrap
from urllib.parse import urlparse from urllib.parse import urlparse
import zipfile import zipfile
@ -301,7 +303,7 @@ def copy_contents(src, dest):
for item in src.iterdir(): for item in src.iterdir():
shutil.copyfile(item, dest / item.name) shutil.copyfile(item, dest / item.name)
def publish(args): def generate(args):
jenv = Environment( jenv = Environment(
loader=PackageLoader("dlibrary"), loader=PackageLoader("dlibrary"),
autoescape=select_autoescape() autoescape=select_autoescape()
@ -408,12 +410,33 @@ def publish(args):
con.close() con.close()
argparser = argparse.ArgumentParser(prog='dlibrary') argparser = argparse.ArgumentParser(
prog='dlibrary',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent("""\
Organize DRM-free works purchased from DLSite into a library
that can be viewed in a web browser.
Intended workflow:
- `extract` a collection of zipfiles downloaded from DLSite
into DLibrary's data directory, giving each work its own
subfolder.
- `fetch` metadata and thumbnail images for extracted works
from DLSite.
- `collate` and/or `manual-collate` extracted works,
producing a single sequence of image files (or symlinks
into the extracted data, when possible) for each work.
- Manually adjust works' `metadata` when necessary.
- `generate` a static website providing a catalog and viewer
for all collated works.
"""),
)
argparser.add_argument( argparser.add_argument(
'-d', '--destdir', '-d', '--destdir',
type=Path, type=Path,
default=Path('./dlibrary'), default=Path(getenv('DLIBRARY_DIR', './dlibrary')),
help='directory to store dlibrary content and metadata to (default: ./dlibrary)', help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
) )
subparsers = argparser.add_subparsers(title="subcommands", required=True) subparsers = argparser.add_subparsers(title="subcommands", required=True)
@ -435,23 +458,79 @@ parser_extract.set_defaults(func=extract)
parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnails') parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnails')
parser_fetch.set_defaults(func=fetch) parser_fetch.set_defaults(func=fetch)
parser_collate = subparsers.add_parser('collate', help='collate a single sequence of image files for each work') parser_collate = subparsers.add_parser(
'collate',
help='collate each work into a sequence of image files',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent("""\
For each extracted work that has not already been collated,
DLibrary will attempt to intuit its structure as follows:
- Enter the work's directory. If the directory contains
nothing except a single subdirectory (ignoring a few types
of files that are definitely not relevant), traverse
downwards repeatedly.
- If the current directory contains nothing except a single
PDF (again, ignoring irrelevant files), attempt to extract
a series of images from the PDF. This process expects that
each page of the PDF consists of a single embedded image,
which will be extracted at full resolution. Support for
more complex PDFs is not yet implemented.
- If the current directory contains nothing except image
files, and the image files are named in a way that clearly
indicates a complete numerical order (each filename
consists of a shared prefix followed by a distinct
number), symlink files in the inferred order.
- Otherwise, skip processing this work for now.
DLibrary can be given "collation hints" which provide
alternative starting points for this search process. A hint
is a path under $DLIBRARY_DIR/extract/[work id]/
indicating a different directory or PDF file to begin the
search process for that work, rather than starting at the
top level of the extracted data. There can be at most one
hint per work; for more complicated scenarios where a work
includes multiple folders that need to be collated together,
or where filenames do not clearly indicate an ordering, use
`manual-collate` instead.
"""),
)
parser_collate.add_argument( parser_collate.add_argument(
'hints', 'hints',
metavar='PATH', metavar='PATH',
type=Path, type=Path,
nargs='*', nargs='*',
help='manually-specified paths of subdirectories or PDFs within extraction folders, at most one per work', help='paths within extraction folders as collation hints'
) )
parser_collate.set_defaults(func=collate) parser_collate.set_defaults(func=collate)
parser_manual_collate = subparsers.add_parser('manual-collate', help='collate a specific work manually, specifying all paths to include') parser_manual_collate = subparsers.add_parser(
'manual-collate',
help='collate a single work manually',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent("""\
All provided paths must be under $DLIBRARY_DIR/extract/[work id]/
for the work being manually collated. `manual-collate` can
only handle one work at a time. Paths are used as follows:
- If a path is a directory, all *image files* immediately
inside that directory will be appended to the sequence. If
files are named in a way which indicates a clear ordering,
that ordering will be used. Otherwise, filenames will be
sorted lexicographically. Non-image files and
subdirectories will be ignored.
- If a path is an image file, that image file will be
appended to the sequence.
- If a path is a PDF file, page images will be extracted
from that PDF and appended to the sequence.
"""),
)
parser_manual_collate.add_argument( parser_manual_collate.add_argument(
'paths', 'paths',
metavar='PATH', metavar='PATH',
type=Path, type=Path,
nargs='+', nargs='+',
help='paths of files (images to symlink, pdfs to extract) or directories (symlink all images in the directory, no recursion, best-effort sorting)' help='paths within a single work to be collated in sequence',
) )
parser_manual_collate.set_defaults(func=manual_collate) parser_manual_collate.set_defaults(func=manual_collate)
@ -464,8 +543,20 @@ parser_metadata.add_argument(
) )
parser_metadata.set_defaults(func=metadata) parser_metadata.set_defaults(func=metadata)
parser_publish = subparsers.add_parser('publish', help='generate HTML/CSS/JS for library site') parser_generate = subparsers.add_parser(
parser_publish.set_defaults(func=publish) 'generate',
help='generate HTML/CSS/JS for library site',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent("""\
The static site will be generated under $DLIBRARY_DIR/site/
and can be served by pointing an HTTP server at that
directory. Note that some files inside the static site
hierarchy will be symlinks into $DLIBRARY_DIR/extract/
outside the site hierarchy, so make sure your HTTP server
will allow those symlinks to be read.
"""),
)
parser_generate.set_defaults(func=generate)
def main(): def main():
args = argparser.parse_args() args = argparser.parse_args()