prioritize matches with more consecutive characters
This commit is contained in:
parent
f994060149
commit
42b49c7ecc
|
@ -837,13 +837,13 @@ def similarity(a, b):
|
|||
if (shorter, longer) in memoized_similarities:
|
||||
return memoized_similarities[(shorter, longer)]
|
||||
|
||||
skip_similarity = similarity(shorter[1:], longer)
|
||||
match_idx = longer.find(shorter[0])
|
||||
if match_idx == -1:
|
||||
result = skip_similarity
|
||||
else:
|
||||
take_similarity = 1 + similarity(shorter[1:], longer[match_idx+1:])
|
||||
result = max(skip_similarity, take_similarity)
|
||||
options = [similarity(shorter[1:], longer)]
|
||||
for i in range(1, len(shorter)+1):
|
||||
match_idx = longer.find(shorter[:i])
|
||||
if match_idx == -1:
|
||||
break
|
||||
options.append(i*i + similarity(shorter[i:], longer[match_idx+i:]))
|
||||
result = max(options)
|
||||
|
||||
memoized_similarities[(shorter, longer)] = result
|
||||
return result
|
||||
|
|
Loading…
Reference in a new issue