add more detailed help and allow configuring destdir with environment variable
This commit is contained in:
parent
fb7d275ebb
commit
1f15abed9a
|
@ -4,10 +4,12 @@ import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import importlib_resources as resources
|
import importlib_resources as resources
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from os import getenv
|
||||||
from os.path import relpath, splitext
|
from os.path import relpath, splitext
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import textwrap
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
|
@ -301,7 +303,7 @@ def copy_contents(src, dest):
|
||||||
for item in src.iterdir():
|
for item in src.iterdir():
|
||||||
shutil.copyfile(item, dest / item.name)
|
shutil.copyfile(item, dest / item.name)
|
||||||
|
|
||||||
def publish(args):
|
def generate(args):
|
||||||
jenv = Environment(
|
jenv = Environment(
|
||||||
loader=PackageLoader("dlibrary"),
|
loader=PackageLoader("dlibrary"),
|
||||||
autoescape=select_autoescape()
|
autoescape=select_autoescape()
|
||||||
|
@ -408,12 +410,33 @@ def publish(args):
|
||||||
con.close()
|
con.close()
|
||||||
|
|
||||||
|
|
||||||
argparser = argparse.ArgumentParser(prog='dlibrary')
|
argparser = argparse.ArgumentParser(
|
||||||
|
prog='dlibrary',
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
description=textwrap.dedent("""\
|
||||||
|
Organize DRM-free works purchased from DLSite into a library
|
||||||
|
that can be viewed in a web browser.
|
||||||
|
|
||||||
|
Intended workflow:
|
||||||
|
- `extract` a collection of zipfiles downloaded from DLSite
|
||||||
|
into DLibrary's data directory, giving each work its own
|
||||||
|
subfolder.
|
||||||
|
- `fetch` metadata and thumbnail images for extracted works
|
||||||
|
from DLSite.
|
||||||
|
- `collate` and/or `manual-collate` extracted works,
|
||||||
|
producing a single sequence of image files (or symlinks
|
||||||
|
into the extracted data, when possible) for each work.
|
||||||
|
- Manually adjust works' `metadata` when necessary.
|
||||||
|
- `generate` a static website providing a catalog and viewer
|
||||||
|
for all collated works.
|
||||||
|
"""),
|
||||||
|
)
|
||||||
|
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
'-d', '--destdir',
|
'-d', '--destdir',
|
||||||
type=Path,
|
type=Path,
|
||||||
default=Path('./dlibrary'),
|
default=Path(getenv('DLIBRARY_DIR', './dlibrary')),
|
||||||
help='directory to store dlibrary content and metadata to (default: ./dlibrary)',
|
help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
|
||||||
)
|
)
|
||||||
subparsers = argparser.add_subparsers(title="subcommands", required=True)
|
subparsers = argparser.add_subparsers(title="subcommands", required=True)
|
||||||
|
|
||||||
|
@ -435,23 +458,79 @@ parser_extract.set_defaults(func=extract)
|
||||||
parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnails')
|
parser_fetch = subparsers.add_parser('fetch', help='fetch metadata and thumbnails')
|
||||||
parser_fetch.set_defaults(func=fetch)
|
parser_fetch.set_defaults(func=fetch)
|
||||||
|
|
||||||
parser_collate = subparsers.add_parser('collate', help='collate a single sequence of image files for each work')
|
parser_collate = subparsers.add_parser(
|
||||||
|
'collate',
|
||||||
|
help='collate each work into a sequence of image files',
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
description=textwrap.dedent("""\
|
||||||
|
For each extracted work that has not already been collated,
|
||||||
|
DLibrary will attempt to intuit its structure as follows:
|
||||||
|
|
||||||
|
- Enter the work's directory. If the directory contains
|
||||||
|
nothing except a single subdirectory (ignoring a few types
|
||||||
|
of files that are definitely not relevant), traverse
|
||||||
|
downwards repeatedly.
|
||||||
|
- If the current directory contains nothing except a single
|
||||||
|
PDF (again, ignoring irrelevant files), attempt to extract
|
||||||
|
a series of images from the PDF. This process expects that
|
||||||
|
each page of the PDF consists of a single embedded image,
|
||||||
|
which will be extracted at full resolution. Support for
|
||||||
|
more complex PDFs is not yet implemented.
|
||||||
|
- If the current directory contains nothing except image
|
||||||
|
files, and the image files are named in a way that clearly
|
||||||
|
indicates a complete numerical order (each filename
|
||||||
|
consists of a shared prefix followed by a distinct
|
||||||
|
number), symlink files in the inferred order.
|
||||||
|
- Otherwise, skip processing this work for now.
|
||||||
|
|
||||||
|
DLibrary can be given "collation hints" which provide
|
||||||
|
alternative starting points for this search process. A hint
|
||||||
|
is a path under $DLIBRARY_DIR/extract/[work id]/
|
||||||
|
indicating a different directory or PDF file to begin the
|
||||||
|
search process for that work, rather than starting at the
|
||||||
|
top level of the extracted data. There can be at most one
|
||||||
|
hint per work; for more complicated scenarios where a work
|
||||||
|
includes multiple folders that need to be collated together,
|
||||||
|
or where filenames do not clearly indicate an ordering, use
|
||||||
|
`manual-collate` instead.
|
||||||
|
"""),
|
||||||
|
)
|
||||||
parser_collate.add_argument(
|
parser_collate.add_argument(
|
||||||
'hints',
|
'hints',
|
||||||
metavar='PATH',
|
metavar='PATH',
|
||||||
type=Path,
|
type=Path,
|
||||||
nargs='*',
|
nargs='*',
|
||||||
help='manually-specified paths of subdirectories or PDFs within extraction folders, at most one per work',
|
help='paths within extraction folders as collation hints'
|
||||||
)
|
)
|
||||||
parser_collate.set_defaults(func=collate)
|
parser_collate.set_defaults(func=collate)
|
||||||
|
|
||||||
parser_manual_collate = subparsers.add_parser('manual-collate', help='collate a specific work manually, specifying all paths to include')
|
parser_manual_collate = subparsers.add_parser(
|
||||||
|
'manual-collate',
|
||||||
|
help='collate a single work manually',
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
description=textwrap.dedent("""\
|
||||||
|
All provided paths must be under $DLIBRARY_DIR/extract/[work id]/
|
||||||
|
for the work being manually collated. `manual-collate` can
|
||||||
|
only handle one work at a time. Paths are used as follows:
|
||||||
|
|
||||||
|
- If a path is a directory, all *image files* immediately
|
||||||
|
inside that directory will be appended to the sequence. If
|
||||||
|
files are named in a way which indicates a clear ordering,
|
||||||
|
that ordering will be used. Otherwise, filenames will be
|
||||||
|
sorted lexicographically. Non-image files and
|
||||||
|
subdirectories will be ignored.
|
||||||
|
- If a path is an image file, that image file will be
|
||||||
|
appended to the sequence.
|
||||||
|
- If a path is a PDF file, page images will be extracted
|
||||||
|
from that PDF and appended to the sequence.
|
||||||
|
"""),
|
||||||
|
)
|
||||||
parser_manual_collate.add_argument(
|
parser_manual_collate.add_argument(
|
||||||
'paths',
|
'paths',
|
||||||
metavar='PATH',
|
metavar='PATH',
|
||||||
type=Path,
|
type=Path,
|
||||||
nargs='+',
|
nargs='+',
|
||||||
help='paths of files (images to symlink, pdfs to extract) or directories (symlink all images in the directory, no recursion, best-effort sorting)'
|
help='paths within a single work to be collated in sequence',
|
||||||
)
|
)
|
||||||
parser_manual_collate.set_defaults(func=manual_collate)
|
parser_manual_collate.set_defaults(func=manual_collate)
|
||||||
|
|
||||||
|
@ -464,8 +543,20 @@ parser_metadata.add_argument(
|
||||||
)
|
)
|
||||||
parser_metadata.set_defaults(func=metadata)
|
parser_metadata.set_defaults(func=metadata)
|
||||||
|
|
||||||
parser_publish = subparsers.add_parser('publish', help='generate HTML/CSS/JS for library site')
|
parser_generate = subparsers.add_parser(
|
||||||
parser_publish.set_defaults(func=publish)
|
'generate',
|
||||||
|
help='generate HTML/CSS/JS for library site',
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
description=textwrap.dedent("""\
|
||||||
|
The static site will be generated under $DLIBRARY_DIR/site/
|
||||||
|
and can be served by pointing an HTTP server at that
|
||||||
|
directory. Note that some files inside the static site
|
||||||
|
hierarchy will be symlinks into $DLIBRARY_DIR/extract/
|
||||||
|
outside the site hierarchy, so make sure your HTTP server
|
||||||
|
will allow those symlinks to be read.
|
||||||
|
"""),
|
||||||
|
)
|
||||||
|
parser_generate.set_defaults(func=generate)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = argparser.parse_args()
|
args = argparser.parse_args()
|
||||||
|
|
Loading…
Reference in a new issue