prioritize matches with more consecutive characters

2024-03-03 03:21:53 -05:00 · 2024-03-03 03:21:53 -05:00 · 42b49c7ecc
commit 42b49c7ecc
parent f994060149
1 changed files with 7 additions and 7 deletions
--- a/dlibrary/dlibrary.py
+++ b/dlibrary/dlibrary.py
@ -837,13 +837,13 @@ def similarity(a, b):
    if (shorter, longer) in memoized_similarities:
        return memoized_similarities[(shorter, longer)]

-    skip_similarity = similarity(shorter[1:], longer)
-    match_idx = longer.find(shorter[0])
+    options = [similarity(shorter[1:], longer)]
+    for i in range(1, len(shorter)+1):
+        match_idx = longer.find(shorter[:i])
        if match_idx == -1:
-        result = skip_similarity
-    else:
-        take_similarity = 1 + similarity(shorter[1:], longer[match_idx+1:])
-        result = max(skip_similarity, take_similarity)
+            break
+        options.append(i*i + similarity(shorter[i:], longer[match_idx+i:]))
+    result = max(options)

    memoized_similarities[(shorter, longer)] = result
    return result