Compare commits

...

3 commits

View file

@ -88,6 +88,11 @@ IRRELEVANT_PDF_BLOCK_REGEX = re.compile(r'\bTCPDF\b', re.I)
MULTIPART_RAR_HEAD_REGEX = re.compile(r'^(.+)\.part0*1\.exe$', re.I) MULTIPART_RAR_HEAD_REGEX = re.compile(r'^(.+)\.part0*1\.exe$', re.I)
MULTIPART_RAR_TAIL_REGEX = re.compile(r'^(.+)\.part0*([^1]|[^0].+)\.rar$', re.I) MULTIPART_RAR_TAIL_REGEX = re.compile(r'^(.+)\.part0*([^1]|[^0].+)\.rar$', re.I)
debug_mode = False
def debug(s):
if debug_mode:
print(s)
def open_zipfile_with_encoding(path): def open_zipfile_with_encoding(path):
for enc in ["utf-8", "shift-jis", "shift-jisx0213"]: for enc in ["utf-8", "shift-jis", "shift-jisx0213"]:
try: try:
@ -720,6 +725,8 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
if len(entries) == 1 and not NUMBER_REGEX.search(nname(entries[0])): if len(entries) == 1 and not NUMBER_REGEX.search(nname(entries[0])):
return {None: entries} return {None: entries}
debug(f'Finding unique hierarchical prefix ordering from start point {start_point} for {entries}')
longest_entry = max(entries, key=lambda e: len(nname(e))) longest_entry = max(entries, key=lambda e: len(nname(e)))
matches = reversed(list(NUMBER_REGEX.finditer(nname(longest_entry)))) matches = reversed(list(NUMBER_REGEX.finditer(nname(longest_entry))))
for m in matches: for m in matches:
@ -727,6 +734,7 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
if pos < start_point: if pos < start_point:
return None return None
prefix = nname(longest_entry)[:pos] prefix = nname(longest_entry)[:pos]
debug(f'Checking prefix {prefix}')
if all(nname(e).startswith(prefix) or prefix.startswith(nfc(e.stem)) for e in entries): if all(nname(e).startswith(prefix) or prefix.startswith(nfc(e.stem)) for e in entries):
numbering = {} numbering = {}
for e in entries: for e in entries:
@ -743,6 +751,7 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
for idx in indices: for idx in indices:
if len(numbering[idx]) > 1: if len(numbering[idx]) > 1:
ents_idx = numbering.pop(idx) ents_idx = numbering.pop(idx)
debug(f'Index {idx} has multiple entries')
longest = max(ents_idx, key=lambda e: len(nname(e))) longest = max(ents_idx, key=lambda e: len(nname(e)))
next_layer_start = pos + NUMBER_REGEX.match(nname(longest)[pos:]).end() next_layer_start = pos + NUMBER_REGEX.match(nname(longest)[pos:]).end()
sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start) sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start)
@ -756,22 +765,22 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
return None return None
def alphabetic_numbering(entries, start_point): def alphabetic_numbering(entries, start_point):
debug(f'Finding alphabetic numbering from start point {start_point} for {entries}')
alphabetized = {} alphabetized = {}
for entry in entries: for entry in entries:
ending = nfc(entry.stem)[start_point:].strip(' -_()') ending = nfc(entry.stem)[start_point:].strip(' -_()')
debug(f'{entry} has ending {ending}')
if len(ending) > 1: if len(ending) > 1:
debug('Ending is more than one character, giving up')
return None return None
index = 0 if ending == '' else ord(ending.lower()) - ord('a') + 1 index = 0 if ending == '' else ord(ending.lower()) - ord('a') + 1
if index < 0 or index > 26: if index < 0 or index > 26:
debug('Ending is not a letter, giving up')
return None return None
if (index,) in alphabetized: if (index,) in alphabetized:
debug(f'Index value {index} is already present, giving up')
return None return None
alphabetized[(index,)] = [entry] alphabetized[(index,)] = [entry]
indices = list(alphabetized.keys())
indices.sort()
for i in range(1, len(indices)):
if indices[i][0] - indices[i-1][0] != 1:
return None
return alphabetized return alphabetized
def check_extension(path, exts): def check_extension(path, exts):
@ -1128,6 +1137,11 @@ argparser.add_argument(
default=Path(os.getenv('DLIBRARY_DIR', './dlibrary')), default=Path(os.getenv('DLIBRARY_DIR', './dlibrary')),
help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)', help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
) )
argparser.add_argument(
'-D', '--debug',
action='store_true',
help='print out debugging info',
)
argparser.add_argument( argparser.add_argument(
'-l', '--locale', '-l', '--locale',
type=str, type=str,
@ -1277,6 +1291,10 @@ parser_generate.set_defaults(func=generate)
def main(): def main():
args = argparser.parse_args() args = argparser.parse_args()
global debug_mode
debug_mode = args.debug
args.func(args) args.func(args)
if __name__ == "__main__": if __name__ == "__main__":