prioritize matches with more consecutive characters
This commit is contained in:
parent
f994060149
commit
42b49c7ecc
|
@ -837,13 +837,13 @@ def similarity(a, b):
|
||||||
if (shorter, longer) in memoized_similarities:
|
if (shorter, longer) in memoized_similarities:
|
||||||
return memoized_similarities[(shorter, longer)]
|
return memoized_similarities[(shorter, longer)]
|
||||||
|
|
||||||
skip_similarity = similarity(shorter[1:], longer)
|
options = [similarity(shorter[1:], longer)]
|
||||||
match_idx = longer.find(shorter[0])
|
for i in range(1, len(shorter)+1):
|
||||||
if match_idx == -1:
|
match_idx = longer.find(shorter[:i])
|
||||||
result = skip_similarity
|
if match_idx == -1:
|
||||||
else:
|
break
|
||||||
take_similarity = 1 + similarity(shorter[1:], longer[match_idx+1:])
|
options.append(i*i + similarity(shorter[i:], longer[match_idx+i:]))
|
||||||
result = max(skip_similarity, take_similarity)
|
result = max(options)
|
||||||
|
|
||||||
memoized_similarities[(shorter, longer)] = result
|
memoized_similarities[(shorter, longer)] = result
|
||||||
return result
|
return result
|
||||||
|
|
Loading…
Reference in a new issue