zephyr_mirror: Improve different-paragraph detection algorithm.
(imported from commit 789651dd5b9af58bbc0e59a64dd565f06585134d)
This commit is contained in:
		
							parent
							
								
									403b728d3c
								
							
						
					
					
						commit
						77bb41b3dd
					
				
					 1 changed files with 12 additions and 3 deletions
				
			
		| 
						 | 
				
			
			@ -96,9 +96,18 @@ def to_zephyr_username(humbug_username):
 | 
			
		|||
        raise Exception("Could not parse Zephyr realm for cross-realm user %s" % (humbug_username,))
 | 
			
		||||
    return match_user.group(1).lower() + "@" + match_user.group(2).upper()
 | 
			
		||||
 | 
			
		||||
def early_indent(line, next_line):
 | 
			
		||||
# Checks whether the pair of adjacent lines would have been
 | 
			
		||||
# linewrapped together, had they been intended to be parts of the same
 | 
			
		||||
# paragraph.  Our check is whether if you move the first word on the
 | 
			
		||||
# 2nd line onto the first line, the resulting line is either (1)
 | 
			
		||||
# significantly shorter than the following line (which, if they were
 | 
			
		||||
# in the same paragraph, should have been wrapped in a way consistent
 | 
			
		||||
# with how the previous line was wrapped) or (2) shorter than 60
 | 
			
		||||
# characters (our assumed minimum linewrapping threshhold for Zephyr)
 | 
			
		||||
def different_paragraph(line, next_line):
 | 
			
		||||
    words = next_line.split()
 | 
			
		||||
    return len(line + " " + words[0]) < len(next_line) * 0.8
 | 
			
		||||
    return (len(line + " " + words[0]) < len(next_line) * 0.8 or
 | 
			
		||||
            len(line + " " + words[0]) < 60)
 | 
			
		||||
 | 
			
		||||
# Linewrapping algorithm based on:
 | 
			
		||||
# http://gcbenison.wordpress.com/2011/07/03/a-program-to-intelligently-remove-carriage-returns-so-you-can-paste-text-without-having-it-look-awful/
 | 
			
		||||
| 
						 | 
				
			
			@ -111,7 +120,7 @@ def unwrap_lines(body):
 | 
			
		|||
        if (line == "" or
 | 
			
		||||
            previous_line == "" or
 | 
			
		||||
            not re.match(r'^[\w]', line, flags=re.UNICODE) or
 | 
			
		||||
            early_indent(previous_line, line)):
 | 
			
		||||
            different_paragraph(previous_line, line)):
 | 
			
		||||
            # Use 2 newlines to separate sections so that we
 | 
			
		||||
            # trigger proper Markdown processing on things like
 | 
			
		||||
            # bulleted lists
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue