From 8deac7103c83598c4d24f5c0289c3f36e2e97ba4 Mon Sep 17 00:00:00 2001
From: xenofem <xenofem@xeno.science>
Date: Fri, 15 Mar 2024 16:50:56 -0400
Subject: [PATCH] add debug mode

---
 dlibrary/dlibrary.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py
index 3501161..4092dae 100755
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@@ -88,6 +88,11 @@ IRRELEVANT_PDF_BLOCK_REGEX = re.compile(r'\bTCPDF\b', re.I)
 MULTIPART_RAR_HEAD_REGEX = re.compile(r'^(.+)\.part0*1\.exe$', re.I)
 MULTIPART_RAR_TAIL_REGEX = re.compile(r'^(.+)\.part0*([^1]|[^0].+)\.rar$', re.I)
 
+debug_mode = False
+def debug(s):
+    if debug_mode:
+        print(s)
+
 def open_zipfile_with_encoding(path):
     for enc in ["utf-8", "shift-jis", "shift-jisx0213"]:
         try:
@@ -720,6 +725,8 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
     if len(entries) == 1 and not NUMBER_REGEX.search(nname(entries[0])):
         return {None: entries}
 
+    debug(f'Finding unique hierarchical prefix ordering from start point {start_point} for {entries}')
+
     longest_entry = max(entries, key=lambda e: len(nname(e)))
     matches = reversed(list(NUMBER_REGEX.finditer(nname(longest_entry))))
     for m in matches:
@@ -727,6 +734,7 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
         if pos < start_point:
             return None
         prefix = nname(longest_entry)[:pos]
+        debug(f'Checking prefix {prefix}')
         if all(nname(e).startswith(prefix) or prefix.startswith(nfc(e.stem)) for e in entries):
             numbering = {}
             for e in entries:
@@ -743,6 +751,7 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
             for idx in indices:
                 if len(numbering[idx]) > 1:
                     ents_idx = numbering.pop(idx)
+                    debug(f'Index {idx} has multiple entries')
                     longest = max(ents_idx, key=lambda e: len(nname(e)))
                     next_layer_start = pos + NUMBER_REGEX.match(nname(longest)[pos:]).end()
                     sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start) or alphabetic_numbering(ents_idx, next_layer_start)
@@ -756,21 +765,27 @@ def unique_hierarchical_prefix_numbering(entries, start_point=0):
     return None
 
 def alphabetic_numbering(entries, start_point):
+    debug(f'Finding alphabetic numbering from start point {start_point} for {entries}')
     alphabetized = {}
     for entry in entries:
         ending = nfc(entry.stem)[start_point:].strip(' -_()')
+        debug(f'{entry} has ending {ending}')
         if len(ending) > 1:
+            debug('Ending is more than one character, giving up')
             return None
         index = 0 if ending == '' else ord(ending.lower()) - ord('a') + 1
         if index < 0 or index > 26:
+            debug('Ending is not a letter, giving up')
             return None
         if (index,) in alphabetized:
+            debug(f'Index value {index} is already present, giving up')
             return None
         alphabetized[(index,)] = [entry]
     indices = list(alphabetized.keys())
     indices.sort()
     for i in range(1, len(indices)):
         if indices[i][0] - indices[i-1][0] != 1:
+            debug(f'Adjacent indices {indices[i][0]} and {indices[i-1][0]} are not consecutive, giving up')
             return None
     return alphabetized
 
@@ -1128,6 +1143,11 @@ argparser.add_argument(
     default=Path(os.getenv('DLIBRARY_DIR', './dlibrary')),
     help='directory to store dlibrary content and metadata to (default: $DLIBRARY_DIR or ./dlibrary)',
 )
+argparser.add_argument(
+    '-D', '--debug',
+    action='store_true',
+    help='print out debugging info',
+)
 argparser.add_argument(
     '-l', '--locale',
     type=str,
@@ -1277,6 +1297,10 @@ parser_generate.set_defaults(func=generate)
 
 def main():
     args = argparser.parse_args()
+
+    global debug_mode
+    debug_mode = args.debug
+
     args.func(args)
 
 if __name__ == "__main__":