prioritize matches with more consecutive characters

This commit is contained in:
xenofem 2024-03-03 03:21:53 -05:00
parent f994060149
commit 42b49c7ecc

View file

@ -837,13 +837,13 @@ def similarity(a, b):
if (shorter, longer) in memoized_similarities: if (shorter, longer) in memoized_similarities:
return memoized_similarities[(shorter, longer)] return memoized_similarities[(shorter, longer)]
skip_similarity = similarity(shorter[1:], longer) options = [similarity(shorter[1:], longer)]
match_idx = longer.find(shorter[0]) for i in range(1, len(shorter)+1):
if match_idx == -1: match_idx = longer.find(shorter[:i])
result = skip_similarity if match_idx == -1:
else: break
take_similarity = 1 + similarity(shorter[1:], longer[match_idx+1:]) options.append(i*i + similarity(shorter[i:], longer[match_idx+i:]))
result = max(skip_similarity, take_similarity) result = max(options)
memoized_similarities[(shorter, longer)] = result memoized_similarities[(shorter, longer)] = result
return result return result