show more suggested works, and break ties randomly instead of first-come-first-served

This commit is contained in:
xenofem 2024-03-19 15:12:49 -04:00
parent 4e75017df6
commit f66fa8138d

View file

@ -7,6 +7,7 @@ from io import BytesIO
from pathlib import Path from pathlib import Path
import os import os
from os.path import relpath, splitext from os.path import relpath, splitext
import random
import re import re
import readline import readline
import shutil import shutil
@ -91,6 +92,8 @@ MULTIPART_RAR_TAIL_REGEX = re.compile(r'^(.+)\.part0*([^1]|[^0].+)\.rar$', re.I)
PDF_REFERENCED_IMAGE_REGEX = re.compile(r'(^|(?<=\s))/(?P<ref_name>\S+)\s+Do($|(?=\s))') PDF_REFERENCED_IMAGE_REGEX = re.compile(r'(^|(?<=\s))/(?P<ref_name>\S+)\s+Do($|(?=\s))')
PDF_INLINE_IMAGE_REGEX = re.compile(r'(^|\s)(BI|ID|EI)($|\s)') PDF_INLINE_IMAGE_REGEX = re.compile(r'(^|\s)(BI|ID|EI)($|\s)')
SUGGESTED_WORKS_COUNT = 10
debug_mode = False debug_mode = False
def debug(s): def debug(s):
if debug_mode: if debug_mode:
@ -1012,18 +1015,23 @@ def similarity(a, b):
memoized_similarities[(shorter, longer)] = result memoized_similarities[(shorter, longer)] = result
return result return result
def top(items, n, key): def top(items, n, key, overflow=0):
winners = [] winners = []
for item in items: for item in items:
score = key(item) score = key(item)
if len(winners) < n or score > winners[-1][1]: if len(winners) < n or score >= winners[-1][1]:
for i in range(len(winners) + 1): for i in range(len(winners) + 1):
if i == len(winners) or score > winners[i][1]: if i == len(winners) or score >= winners[i][1]:
winners.insert(i, (item, score)) winners.insert(i, (item, score))
break break
while len(winners) > n: while len(winners) > n and winners[-1][1] < winners[n-1][1]:
winners.pop() winners.pop()
return [item for (item, score) in winners]
# shuffle followed by stable sort to randomly shuffle within each score tier
random.shuffle(winners)
winners.sort(key=lambda w: w[1], reverse=True)
return [item for (item, score) in winners[:n+overflow]]
def generate(args): def generate(args):
jenv = Environment( jenv = Environment(
@ -1080,7 +1088,7 @@ def generate(args):
if work['series'] and work['series'] == other_work['series']: if work['series'] and work['series'] == other_work['series']:
return -1 return -1
return similarity(work['title'], other_work['title']) return similarity(work['title'], other_work['title'])
suggested = top(works, 6, suggestion_priority) suggested = top(works, SUGGESTED_WORKS_COUNT, suggestion_priority)
work_dir = site_dir / 'works' / work['id'] work_dir = site_dir / 'works' / work['id']
viewer_dir = work_dir / 'view' viewer_dir = work_dir / 'view'