Compare commits
No commits in common. "5378899b2e375bb7fd630855a163a8b86a11e399" and "9ab587d39941ef3b9a837076751999d50f131091" have entirely different histories.
5378899b2e
...
9ab587d399
|
@ -88,11 +88,6 @@ IRRELEVANT_PDF_BLOCK_REGEX = re.compile(r'\bTCPDF\b', re.I)
|
||||||
MULTIPART_RAR_HEAD_REGEX = re.compile(r'^(.+)\.part0*1\.exe$', re.I)
|
MULTIPART_RAR_HEAD_REGEX = re.compile(r'^(.+)\.part0*1\.exe$', re.I)
|
||||||
MULTIPART_RAR_TAIL_REGEX = re.compile(r'^(.+)\.part0*([^1]|[^0].+)\.rar$', re.I)
|
MULTIPART_RAR_TAIL_REGEX = re.compile(r'^(.+)\.part0*([^1]|[^0].+)\.rar$', re.I)
|
||||||
|
|
||||||
debug_mode = False
|
|
||||||
def debug(s):
|
|
||||||
if debug_mode:
|
|
||||||
print(s)
|
|
||||||
|
|
||||||
def open_zipfile_with_encoding(path):
|
def open_zipfile_with_encoding(path):
|
||||||
for enc in ["utf-8", "shift-jis", "shift-jisx0213"]:
|
for enc in ["utf-8", "shift-jis", "shift-jisx0213"]:
|
||||||
try:
|
try:
|
||||||
|
@ -725,8 +720,6 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
|
||||||
if len(entries) == 1 and not NUMBER_REGEX.search(nname(entries[0])):
|
if len(entries) == 1 and not NUMBER_REGEX.search(nname(entries[0])):
|
||||||
return {None: entries}
|
return {None: entries}
|
||||||
|
|
||||||
debug(f'Finding unique hierarchical prefix ordering from start point {start_point} for {entries}')
|
|
||||||
|
|
||||||
longest_entry = max(entries, key=lambda e: len(nname(e)))
|
longest_entry = max(entries, key=lambda e: len(nname(e)))
|
||||||
matches = reversed(list(NUMBER_REGEX.finditer(nname(longest_entry))))
|
matches = reversed(list(NUMBER_REGEX.finditer(nname(longest_entry))))
|
||||||
for m in matches:
|
for m in matches:
|
||||||
|
@ -734,7 +727,6 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
|
||||||
if pos < start_point:
|
if pos < start_point:
|
||||||
return None
|
return None
|
||||||
prefix = nname(longest_entry)[:pos]
|
prefix = nname(longest_entry)[:pos]
|
||||||
debug(f'Checking prefix {prefix}')
|
|
||||||
if all(nname(e).startswith(prefix) or prefix.startswith(nfc(e.stem)) for e in entries):
|
if all(nname(e).startswith(prefix) or prefix.startswith(nfc(e.stem)) for e in entries):
|
||||||
numbering = {}
|
numbering = {}
|
||||||
for e in entries:
|
for e in entries:
|
||||||
|
@ -751,7 +743,6 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
|
||||||
for idx in indices:
|
for idx in indices:
|
||||||
if len(numbering[idx]) > 1:
|
if len(numbering[idx]) > 1:
|
||||||
ents_idx = numbering.pop(idx)
|
ents_idx = numbering.pop(idx)
|
||||||
debug(f'Index {idx} has multiple entries')
|
|
||||||
longest = max(ents_idx, key=lambda e: len(nname(e)))
|
longest = max(ents_idx, key=lambda e: len(nname(e)))
|
||||||
next_layer_start = pos + NUMBER_REGEX.match(nname(longest)[pos:]).end()
|
next_layer_start = pos + NUMBER_REGEX.match(nname(longest)[pos:]).end()
|
||||||
sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start)
|
sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start)
|
||||||
|
@ -765,22 +756,22 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def alphabetic_numbering(entries, start_point):
|
def alphabetic_numbering(entries, start_point):
|
||||||
debug(f'Finding alphabetic numbering from start point {start_point} for {entries}')
|
|
||||||
alphabetized = {}
|
alphabetized = {}
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
ending = nfc(entry.stem)[start_point:].strip(' -_()')
|
ending = nfc(entry.stem)[start_point:].strip(' -_()')
|
||||||
debug(f'{entry} has ending {ending}')
|
|
||||||
if len(ending) > 1:
|
if len(ending) > 1:
|
||||||
debug('Ending is more than one character, giving up')
|
|
||||||
return None
|
return None
|
||||||
index = 0 if ending == '' else ord(ending.lower()) - ord('a') + 1
|
index = 0 if ending == '' else ord(ending.lower()) - ord('a') + 1
|
||||||
if index < 0 or index > 26:
|
if index < 0 or index > 26:
|
||||||
debug('Ending is not a letter, giving up')
|
|
||||||
return None
|
return None
|
||||||
if (index,) in alphabetized:
|
if (index,) in alphabetized:
|
||||||
debug(f'Index value {index} is already present, giving up')
|
|
||||||
return None
|
return None
|
||||||
alphabetized[(index,)] = [entry]
|
alphabetized[(index,)] = [entry]
|
||||||
|
indices = list(alphabetized.keys())
|
||||||
|
indices.sort()
|
||||||
|
for i in range(1, len(indices)):
|
||||||
|
if indices[i][0] - indices[i-1][0] != 1:
|
||||||
|
return None
|
||||||
return alphabetized
|
return alphabetized
|
||||||
|
|
||||||
def check_extension(path, exts):
|
def check_extension(path, exts):
|
||||||
|
@ -1137,11 +1128,6 @@ argparser.add_argument(
|
||||||
default=Path(os.getenv('DLIBRARY_DIR', './dlibrary')),
|
default=Path(os.getenv('DLIBRARY_DIR', './dlibrary')),
|
||||||
help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
|
help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
|
||||||
)
|
)
|
||||||
argparser.add_argument(
|
|
||||||
'-D', '--debug',
|
|
||||||
action='store_true',
|
|
||||||
help='print out debugging info',
|
|
||||||
)
|
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
'-l', '--locale',
|
'-l', '--locale',
|
||||||
type=str,
|
type=str,
|
||||||
|
@ -1291,10 +1277,6 @@ parser_generate.set_defaults(func=generate)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = argparser.parse_args()
|
args = argparser.parse_args()
|
||||||
|
|
||||||
global debug_mode
|
|
||||||
debug_mode = args.debug
|
|
||||||
|
|
||||||
args.func(args)
|
args.func(args)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in a new issue