From 77bb41b3dda7e374a22dc6584c1d3c3e80bfe443 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Mon, 5 Nov 2012 15:39:09 -0500 Subject: [PATCH] zephyr_mirror: Improve different-paragraph detection algorithm. (imported from commit 789651dd5b9af58bbc0e59a64dd565f06585134d) --- bots/zephyr_mirror.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/bots/zephyr_mirror.py b/bots/zephyr_mirror.py index 5642c60..6c4fe3e 100755 --- a/bots/zephyr_mirror.py +++ b/bots/zephyr_mirror.py @@ -96,9 +96,18 @@ def to_zephyr_username(humbug_username): raise Exception("Could not parse Zephyr realm for cross-realm user %s" % (humbug_username,)) return match_user.group(1).lower() + "@" + match_user.group(2).upper() -def early_indent(line, next_line): +# Checks whether the pair of adjacent lines would have been +# linewrapped together, had they been intended to be parts of the same +# paragraph. Our check is whether if you move the first word on the +# 2nd line onto the first line, the resulting line is either (1) +# significantly shorter than the following line (which, if they were +# in the same paragraph, should have been wrapped in a way consistent +# with how the previous line was wrapped) or (2) shorter than 60 +# characters (our assumed minimum linewrapping threshhold for Zephyr) +def different_paragraph(line, next_line): words = next_line.split() - return len(line + " " + words[0]) < len(next_line) * 0.8 + return (len(line + " " + words[0]) < len(next_line) * 0.8 or + len(line + " " + words[0]) < 60) # Linewrapping algorithm based on: # http://gcbenison.wordpress.com/2011/07/03/a-program-to-intelligently-remove-carriage-returns-so-you-can-paste-text-without-having-it-look-awful/ @@ -111,7 +120,7 @@ def unwrap_lines(body): if (line == "" or previous_line == "" or not re.match(r'^[\w]', line, flags=re.UNICODE) or - early_indent(previous_line, line)): + different_paragraph(previous_line, line)): # Use 2 newlines to separate sections so that we # trigger proper Markdown processing on things like # bulleted lists