handle hierarchical numbering schemes in auto-collation

This commit is contained in:
xenofem 2024-02-06 11:02:08 -05:00
parent d66b467c5c
commit aefaf824a8

View file

@ -202,7 +202,7 @@ def complete_prefix_number_ordering(entries):
version = next(ver for ver in (ALT_VERSIONS + ['']) if ver in entry.name)
entries_by_version.setdefault(version, []).append(entry)
numberings_by_version = {ver: prefix_numbering(entries_by_version[ver]) for ver in entries_by_version}
numberings_by_version = {ver: unique_hierarchical_prefix_numbering(entries_by_version[ver]) for ver in entries_by_version}
unified_indices = set()
for numbering in numberings_by_version.values():
@ -212,8 +212,15 @@ def complete_prefix_number_ordering(entries):
unified_indices = list(unified_indices)
unified_indices.sort()
if len(unified_indices) > 1 and min(unified_indices[i] - unified_indices[i-1] for i in range(1, len(unified_indices))) > 2:
if len(unified_indices) > 1:
for i in range(1, len(unified_indices)):
cur = unified_indices[i]
prev = unified_indices[i-1]
for level in range(min(len(cur), len(prev))):
if cur[level] != prev[level]:
if cur[level] - prev[level] > 2:
return None
break
versions = list(numberings_by_version.keys())
versions.sort()
@ -231,27 +238,37 @@ def complete_prefix_number_ordering(entries):
for out_ver in outer_versions:
for i in unified_indices:
for ver in ([out_ver] + (inner_versions if out_ver == versions[0] else [])):
entries_i_ver = numberings_by_version[ver].get(i, [])
if len(entries_i_ver) <= 1:
result += entries_i_ver
else:
return None
result += numberings_by_version[ver].get(i, [])
return result
def prefix_numbering(entries):
def unique_hierarchical_prefix_numbering(entries, start_point=0):
matches = reversed(list(NUMBER_REGEX.finditer(entries[0].name)))
for m in matches:
pos = m.start()
if pos < start_point:
return None
prefix = entries[0].name[:pos]
if all(e.name.startswith(prefix) for e in entries):
entries_by_index = {}
numbering = {}
for e in entries:
n = NUMBER_REGEX.match(e.name[pos:])
if n is None:
return None
i = int(n.group())
entries_by_index.setdefault(i, []).append(e)
return entries_by_index
numbering.setdefault((i,), []).append(e)
indices = list(numbering.keys())
for idx in indices:
if len(numbering[idx]) > 1:
ents_idx = numbering.pop(idx)
next_layer_start = pos + NUMBER_REGEX.match(ents_idx[0].name[pos:]).end()
sub_numbering = unique_hierarchical_prefix_numbering(ents_idx, start_point=next_layer_start)
if not sub_numbering:
return None
for sub_idx in sub_numbering:
numbering[(*idx, *sub_idx)] = sub_numbering[sub_idx]
return numbering
return None