From ace12bcab6c87f01f5b743f9e48aec1217f0090b Mon Sep 17 00:00:00 2001 From: Steve Howell Date: Wed, 20 Nov 2013 17:25:48 -0500 Subject: [PATCH] Support arbitrarily nested fenced quote/code blocks. Now we can nest fenced code/quote blocks inside of quote blocks down to arbitrary depths. Code blocks are always leafs. Fenced blocks start with at least three tildes or backticks, and the clump of punctuation then becomes the terminator for the block. If the user ends their message without terminators, all blocks are automatically closed. When inside a quote block, you can start another fenced block with any header that doesn't match the end-string of the outer block. (If you don't want to specify a language, then you can change the number of backticks/tildes to avoid amiguity.) Most of the heavy lifting happens in FencedBlockPreprocessor.run(). The parser works by pushing handlers on to a stack and popping them off when the ends of blocks are encountered. Parents communicate with their children by passing in a simple Python list of strings for the child to append to. Handlers also maintain their own lists for their own content, and when their done() method is called, they render their data as needed. The handlers are objects returned by functions, and the handler functions close on variables push, pop, and processor. The closure style here makes the handlers pretty tightly coupled to the outer run() method. If we wanted to move to a class-based style, the tradeoff would be that the class instances would have to marshall push/pop/processor etc., but we could test the components more easily in isolation. Dealing with blank lines is very fiddly inside of bugdown. The new functionality here is captured in the test BugdownTest.test_complexly_nested_quote(). (imported from commit 53886c8de74bdf2bbd3cef8be9de25f05bddb93c) --- zerver/lib/bugdown/fenced_code.py | 168 +++++++++++++++++++++--------- zerver/tests.py | 79 ++++++++++++-- 2 files changed, 190 insertions(+), 57 deletions(-) diff --git a/zerver/lib/bugdown/fenced_code.py b/zerver/lib/bugdown/fenced_code.py index 8077f96cd9..826013b817 100644 --- a/zerver/lib/bugdown/fenced_code.py +++ b/zerver/lib/bugdown/fenced_code.py @@ -68,11 +68,25 @@ import markdown from zerver.lib.bugdown.codehilite import CodeHilite, CodeHiliteExtension # Global vars -FENCE_RE = re.compile(r'(?P^(?:~{3,}|`{3,}))[ ]*(\{?\.?(?P[a-zA-Z0-9_+-]*)\}?)$', re.MULTILINE|re.DOTALL) -FENCED_BLOCK_RE = re.compile( \ - r'(?P^(?:~{3,}|`{3,}))[ ]*(\{?\.?(?P[a-zA-Z0-9_+-]*)\}?)?[ ]*\n(?P.*?)(?<=\n)(?P=fence)[ ]*$', - re.MULTILINE|re.DOTALL +FENCE_RE = re.compile(r""" + # ~~~ or ``` + (?P + ^(?:~{3,}|`{3,}) ) + + [ ]* # spaces + + ( + \{?\.? + (?P + [a-zA-Z0-9_+-]* + ) # "py" or "javascript" + \}? + ) # language, like ".py" or "{javascript}" + $ + """, re.VERBOSE) + + CODE_WRAP = '
%s
' LANG_TAG = ' class="%s"' @@ -101,6 +115,109 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): self.checked_for_codehilite = False self.codehilite_conf = {} + def run(self, lines): + """ Match and store Fenced Code Blocks in the HtmlStash. """ + + output = [] + + class Record: + pass + + processor = self + handlers = [] + + def push(handler): + handlers.append(handler) + + def pop(): + handlers.pop() + + class OuterHandler: + def __init__(self, output): + self.output = output + + def handle_line(self, line): + check_for_new_fence(self.output, line) + + def done(self): + pop() + + def check_for_new_fence(output, line): + m = FENCE_RE.match(line) + if m: + fence = m.group('fence') + lang = m.group('lang') + handler = generic_handler(output, fence, lang) + push(handler) + else: + output.append(line) + + def generic_handler(output, fence, lang): + if lang in ('quote', 'quoted'): + return QuoteHandler(output, fence) + else: + return CodeHandler(output, fence, lang) + + class QuoteHandler: + def __init__(self, output, fence): + self.output = output + self.fence = fence + self.lines = [] + + def handle_line(self, line): + if line.rstrip() == self.fence: + self.done() + else: + check_for_new_fence(self.lines, line) + + def done(self): + text = '\n'.join(self.lines) + text = processor.format_quote(text) + processed_lines = text.split('\n') + self.output.append('') + self.output.extend(processed_lines) + self.output.append('') + pop() + + class CodeHandler: + def __init__(self, output, fence, lang): + self.output = output + self.fence = fence + self.lang = lang + self.lines = [] + + def handle_line(self, line): + if line.rstrip() == self.fence: + self.done() + else: + self.lines.append(line) + + def done(self): + text = '\n'.join(self.lines) + text = processor.format_code(self.lang, text) + text = processor.placeholder(text) + processed_lines = text.split('\n') + self.output.append('') + self.output.extend(processed_lines) + self.output.append('') + pop() + + handler = OuterHandler(output) + push(handler) + + for line in lines: + handlers[-1].handle_line(line) + + while handlers: + handlers[-1].done() + + # This fiddly handling of new lines at the end of our output was done to make + # existing tests pass. Bugdown is just kind of funny when it comes to new lines, + # but we could probably remove this hack. + if len(output) > 2 and output[-2] != '': + output.append('') + return output + def format_code(self, lang, text): langclass = '' if lang: @@ -143,49 +260,6 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): def placeholder(self, code): return self.markdown.htmlStash.store(code, safe=True) - def format_fence(self, lang, text): - if lang in ('quote', 'quoted'): - replacement = self.format_quote(text) - return replacement - else: - code = self.format_code(lang, text) - return self.placeholder(code) - - def process_fence(self, m, text): - lang = m.group('lang') - code = m.group('code') - fence_text = self.format_fence(lang, code) - before_text = text[:m.start()] - end_text = text[m.end():] - return '%s\n%s\n%s'% (before_text, fence_text, end_text) - - def run(self, lines): - """ Match and store Fenced Code Blocks in the HtmlStash. """ - - text = "\n".join(lines) - while 1: - m = FENCED_BLOCK_RE.search(text) - if m: - text = self.process_fence(m, text) - else: - break - - - fence = FENCE_RE.search(text) - if fence: - # If we found a starting fence but no ending fence, - # then we add a closing fence before the two newlines that - # markdown automatically inserts - if text[-2:] == '\n\n': - text = text[:-2] + '\n' + fence.group('fence') + text[-2:] - else: - text += fence.group('fence') - m = FENCED_BLOCK_RE.search(text) - if m: - text = self.process_fence(m, text) - - return text.split("\n") - def _escape(self, txt): """ basic html escaping """ txt = txt.replace('&', '&') diff --git a/zerver/tests.py b/zerver/tests.py index 8393cffcda..8349d019e5 100644 --- a/zerver/tests.py +++ b/zerver/tests.py @@ -2846,7 +2846,7 @@ class FencedBlockPreprocessorTest(TestCase): # Simulate code formatting. processor.format_code = lambda lang, code: lang + ':' + code - processor.placeholder = lambda s: '(' + s + ')' + processor.placeholder = lambda s: '**' + s.strip('\n') + '**' markdown = [ '``` .py', @@ -2861,13 +2861,11 @@ class FencedBlockPreprocessorTest(TestCase): ] expected = [ '', - '(py:hello()', - ')', + '**py:hello()**', '', '', '', - '(py:goodbye()', - ')', + '**py:goodbye()**', '', '', '' @@ -2880,7 +2878,7 @@ class FencedBlockPreprocessorTest(TestCase): # Simulate code formatting. processor.format_code = lambda lang, code: lang + ':' + code - processor.placeholder = lambda s: '(' + s + ')' + processor.placeholder = lambda s: '**' + s.strip('\n') + '**' markdown = [ '~~~ quote', @@ -2895,10 +2893,7 @@ class FencedBlockPreprocessorTest(TestCase): '', '> hi', '', - '> (py:hello()', - '> )', - '', - '', + '> **py:hello()**', '', '', '' @@ -3047,6 +3042,70 @@ Thou canst not then be false to any man.

self.common_bugdown_test(fenced_quote, expected_convert) + def test_complexly_nested_quote(self): + fenced_quote = \ +"""I heard about this second hand... + +~~~ quote + +He said: +~~~ quote +The customer is complaining. + +They looked at this code: +``` .py +def hello(): print 'hello +``` +They would prefer: +~~~ .rb +def hello() + puts 'hello' +end +~~~ + +Please advise. +~~~ + +She said: +~~~ quote +Just send them this: +``` .sh +echo "hello\n" +``` +~~~""" + expected = \ +"""

I heard about this second hand...

+
+

He said:

+
+

The customer is complaining.

+

They looked at this code:

+
def hello(): print 'hello
+
+ + +

They would prefer:

+
def hello()
+  puts 'hello'
+end
+
+ + +

Please advise.

+
+

She said:

+
+

Just send them this:

+
echo "hello
+"
+
+ + +
+
""" + + self.common_bugdown_test(fenced_quote, expected) + def test_dangerous_block(self): fenced_code = u'xxxxxx xxxxx xxxxxxxx xxxx. x xxxx xxxxxxxxxx:\n\n```\ "xxxx xxxx\\xxxxx\\xxxxxx"```\n\nxxx xxxx xxxxx:```xx.xxxxxxx(x\'^xxxx$\'\