prioritize matches with more consecutive characters

This commit is contained in:
xenofem 2024-03-03 03:21:53 -05:00
parent f994060149
commit 42b49c7ecc

View file

@ -837,13 +837,13 @@ def similarity(a, b):
if (shorter, longer) in memoized_similarities:
return memoized_similarities[(shorter, longer)]
skip_similarity = similarity(shorter[1:], longer)
match_idx = longer.find(shorter[0])
if match_idx == -1:
result = skip_similarity
else:
take_similarity = 1 + similarity(shorter[1:], longer[match_idx+1:])
result = max(skip_similarity, take_similarity)
options = [similarity(shorter[1:], longer)]
for i in range(1, len(shorter)+1):
match_idx = longer.find(shorter[:i])
if match_idx == -1:
break
options.append(i*i + similarity(shorter[i:], longer[match_idx+i:]))
result = max(options)
memoized_similarities[(shorter, longer)] = result
return result