From 42b49c7ecc0b22b0f35d61250900bc0aef1c0c46 Mon Sep 17 00:00:00 2001 From: xenofem Date: Sun, 3 Mar 2024 03:21:53 -0500 Subject: [PATCH] prioritize matches with more consecutive characters --- dlibrary/dlibrary.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 03f7f4e..97d657c 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -837,13 +837,13 @@ def similarity(a, b): if (shorter, longer) in memoized_similarities: return memoized_similarities[(shorter, longer)] - skip_similarity = similarity(shorter[1:], longer) - match_idx = longer.find(shorter[0]) - if match_idx == -1: - result = skip_similarity - else: - take_similarity = 1 + similarity(shorter[1:], longer[match_idx+1:]) - result = max(skip_similarity, take_similarity) + options = [similarity(shorter[1:], longer)] + for i in range(1, len(shorter)+1): + match_idx = longer.find(shorter[:i]) + if match_idx == -1: + break + options.append(i*i + similarity(shorter[i:], longer[match_idx+i:])) + result = max(options) memoized_similarities[(shorter, longer)] = result return result