From f66fa8138d543cc3621707ddd4e1d2fac76ce8cd Mon Sep 17 00:00:00 2001 From: xenofem Date: Tue, 19 Mar 2024 15:12:49 -0400 Subject: [PATCH] show more suggested works, and break ties randomly instead of first-come-first-served --- dlibrary/dlibrary.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/dlibrary/dlibrary.py b/dlibrary/dlibrary.py index 096a623..88823e7 100755 --- a/dlibrary/dlibrary.py +++ b/dlibrary/dlibrary.py @@ -7,6 +7,7 @@ from io import BytesIO from pathlib import Path import os from os.path import relpath, splitext +import random import re import readline import shutil @@ -91,6 +92,8 @@ MULTIPART_RAR_TAIL_REGEX = re.compile(r'^(.+)\.part0*([^1]|[^0].+)\.rar$', re.I) PDF_REFERENCED_IMAGE_REGEX = re.compile(r'(^|(?<=\s))/(?P\S+)\s+Do($|(?=\s))') PDF_INLINE_IMAGE_REGEX = re.compile(r'(^|\s)(BI|ID|EI)($|\s)') +SUGGESTED_WORKS_COUNT = 10 + debug_mode = False def debug(s): if debug_mode: @@ -1012,18 +1015,23 @@ def similarity(a, b): memoized_similarities[(shorter, longer)] = result return result -def top(items, n, key): +def top(items, n, key, overflow=0): winners = [] for item in items: score = key(item) - if len(winners) < n or score > winners[-1][1]: + if len(winners) < n or score >= winners[-1][1]: for i in range(len(winners) + 1): - if i == len(winners) or score > winners[i][1]: + if i == len(winners) or score >= winners[i][1]: winners.insert(i, (item, score)) break - while len(winners) > n: + while len(winners) > n and winners[-1][1] < winners[n-1][1]: winners.pop() - return [item for (item, score) in winners] + + # shuffle followed by stable sort to randomly shuffle within each score tier + random.shuffle(winners) + winners.sort(key=lambda w: w[1], reverse=True) + + return [item for (item, score) in winners[:n+overflow]] def generate(args): jenv = Environment( @@ -1080,7 +1088,7 @@ def generate(args): if work['series'] and work['series'] == other_work['series']: return -1 return similarity(work['title'], other_work['title']) - suggested = top(works, 6, suggestion_priority) + suggested = top(works, SUGGESTED_WORKS_COUNT, suggestion_priority) work_dir = site_dir / 'works' / work['id'] viewer_dir = work_dir / 'view'