add more detailed help and allow configuring destdir with environment variable
This commit is contained in:
parent
fb7d275ebb
commit
1f15abed9a
|
@ -4,10 +4,12 @@ import argparse
|
|||
import asyncio
|
||||
import importlib_resources as resources
|
||||
from pathlib import Path
|
||||
from os import getenv
|
||||
from os.path import relpath, splitext
|
||||
import re
|
||||
import shutil
|
||||
import sqlite3
|
||||
import textwrap
|
||||
from urllib.parse import urlparse
|
||||
import zipfile
|
||||
|
||||
|
@ -301,7 +303,7 @@ def copy_contents(src, dest):
|
|||
for item in src.iterdir():
|
||||
shutil.copyfile(item, dest / item.name)
|
||||
|
||||
def publish(args):
|
||||
def generate(args):
|
||||
jenv = Environment(
|
||||
loader=PackageLoader("dlibrary"),
|
||||
autoescape=select_autoescape()
|
||||
|
@ -408,12 +410,33 @@ def publish(args):
|
|||
con.close()
|
||||
|
||||
|
||||
argparser = argparse.ArgumentParser(prog='dlibrary')
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog='dlibrary',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
description=textwrap.dedent("""\
|
||||
Organize DRM-free works purchased from DLSite into a library
|
||||
that can be viewed in a web browser.
|
||||
|
||||
Intended workflow:
|
||||
- `extract` a collection of zipfiles downloaded from DLSite
|
||||
into DLibrary's data directory, giving each work its own
|
||||
subfolder.
|
||||
- `fetch` metadata and thumbnail images for extracted works
|
||||
from DLSite.
|
||||
- `collate` and/or `manual-collate` extracted works,
|
||||
producing a single sequence of image files (or symlinks
|
||||
into the extracted data, when possible) for each work.
|
||||
- Manually adjust works' `metadata` when necessary.
|
||||
- `generate` a static website providing a catalog and viewer
|
||||
for all collated works.
|
||||
"""),
|
||||
)
|
||||
|
||||
argparser.add_argument(
|
||||
'-d', '--destdir',
|
||||
type=Path,
|
||||
default=Path('./dlibrary'),
|
||||
help='directory to store dlibrary content and metadata to (default: ./dlibrary)',
|
||||
default=Path(getenv('DLIBRARY_DIR', './dlibrary')),
|
||||
help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
|
||||
)
|
||||
subparsers = argparser.add_subparsers(title="subcommands", required=True)
|
||||
|
||||
|
@ -435,23 +458,79 @@ parser_extract.set_defaults(func=extract)
|
|||
parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnails')
|
||||
parser_fetch.set_defaults(func=fetch)
|
||||
|
||||
parser_collate = subparsers.add_parser('collate', help='collate a single sequence of image files for each work')
|
||||
parser_collate = subparsers.add_parser(
|
||||
'collate',
|
||||
help='collate each work into a sequence of image files',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
description=textwrap.dedent("""\
|
||||
For each extracted work that has not already been collated,
|
||||
DLibrary will attempt to intuit its structure as follows:
|
||||
|
||||
- Enter the work's directory. If the directory contains
|
||||
nothing except a single subdirectory (ignoring a few types
|
||||
of files that are definitely not relevant), traverse
|
||||
downwards repeatedly.
|
||||
- If the current directory contains nothing except a single
|
||||
PDF (again, ignoring irrelevant files), attempt to extract
|
||||
a series of images from the PDF. This process expects that
|
||||
each page of the PDF consists of a single embedded image,
|
||||
which will be extracted at full resolution. Support for
|
||||
more complex PDFs is not yet implemented.
|
||||
- If the current directory contains nothing except image
|
||||
files, and the image files are named in a way that clearly
|
||||
indicates a complete numerical order (each filename
|
||||
consists of a shared prefix followed by a distinct
|
||||
number), symlink files in the inferred order.
|
||||
- Otherwise, skip processing this work for now.
|
||||
|
||||
DLibrary can be given "collation hints" which provide
|
||||
alternative starting points for this search process. A hint
|
||||
is a path under $DLIBRARY_DIR/extract/[work id]/
|
||||
indicating a different directory or PDF file to begin the
|
||||
search process for that work, rather than starting at the
|
||||
top level of the extracted data. There can be at most one
|
||||
hint per work; for more complicated scenarios where a work
|
||||
includes multiple folders that need to be collated together,
|
||||
or where filenames do not clearly indicate an ordering, use
|
||||
`manual-collate` instead.
|
||||
"""),
|
||||
)
|
||||
parser_collate.add_argument(
|
||||
'hints',
|
||||
metavar='PATH',
|
||||
type=Path,
|
||||
nargs='*',
|
||||
help='manually-specified paths of subdirectories or PDFs within extraction folders, at most one per work',
|
||||
help='paths within extraction folders as collation hints'
|
||||
)
|
||||
parser_collate.set_defaults(func=collate)
|
||||
|
||||
parser_manual_collate = subparsers.add_parser('manual-collate', help='collate a specific work manually, specifying all paths to include')
|
||||
parser_manual_collate = subparsers.add_parser(
|
||||
'manual-collate',
|
||||
help='collate a single work manually',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
description=textwrap.dedent("""\
|
||||
All provided paths must be under $DLIBRARY_DIR/extract/[work id]/
|
||||
for the work being manually collated. `manual-collate` can
|
||||
only handle one work at a time. Paths are used as follows:
|
||||
|
||||
- If a path is a directory, all *image files* immediately
|
||||
inside that directory will be appended to the sequence. If
|
||||
files are named in a way which indicates a clear ordering,
|
||||
that ordering will be used. Otherwise, filenames will be
|
||||
sorted lexicographically. Non-image files and
|
||||
subdirectories will be ignored.
|
||||
- If a path is an image file, that image file will be
|
||||
appended to the sequence.
|
||||
- If a path is a PDF file, page images will be extracted
|
||||
from that PDF and appended to the sequence.
|
||||
"""),
|
||||
)
|
||||
parser_manual_collate.add_argument(
|
||||
'paths',
|
||||
metavar='PATH',
|
||||
type=Path,
|
||||
nargs='+',
|
||||
help='paths of files (images to symlink, pdfs to extract) or directories (symlink all images in the directory, no recursion, best-effort sorting)'
|
||||
help='paths within a single work to be collated in sequence',
|
||||
)
|
||||
parser_manual_collate.set_defaults(func=manual_collate)
|
||||
|
||||
|
@ -464,8 +543,20 @@ parser_metadata.add_argument(
|
|||
)
|
||||
parser_metadata.set_defaults(func=metadata)
|
||||
|
||||
parser_publish = subparsers.add_parser('publish', help='generate HTML/CSS/JS for library site')
|
||||
parser_publish.set_defaults(func=publish)
|
||||
parser_generate = subparsers.add_parser(
|
||||
'generate',
|
||||
help='generate HTML/CSS/JS for library site',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
description=textwrap.dedent("""\
|
||||
The static site will be generated under $DLIBRARY_DIR/site/
|
||||
and can be served by pointing an HTTP server at that
|
||||
directory. Note that some files inside the static site
|
||||
hierarchy will be symlinks into $DLIBRARY_DIR/extract/
|
||||
outside the site hierarchy, so make sure your HTTP server
|
||||
will allow those symlinks to be read.
|
||||
"""),
|
||||
)
|
||||
parser_generate.set_defaults(func=generate)
|
||||
|
||||
def main():
|
||||
args = argparser.parse_args()
|
||||
|
|
Loading…
Reference in a new issue