zulip/zerver/lib/bugdown/fenced_code.py

#!/usr/bin/env python

"""
Fenced Code Extension for Python Markdown
=========================================

This extension adds Fenced Code Blocks to Python-Markdown.

    >>> import markdown
    >>> text = '''
    ... A paragraph before a fenced code block:
    ...
    ... ~~~
    ... Fenced code block
    ... ~~~
    ... '''
    >>> html = markdown.markdown(text, extensions=['fenced_code'])
    >>> print html
    <p>A paragraph before a fenced code block:</p>
    <pre><code>Fenced code block
    </code></pre>

Works with safe_mode also (we check this because we are using the HtmlStash):

    >>> print markdown.markdown(text, extensions=['fenced_code'], safe_mode='replace')
    <p>A paragraph before a fenced code block:</p>
    <pre><code>Fenced code block
    </code></pre>

Include tilde's in a code block and wrap with blank lines:

    >>> text = '''
    ... ~~~~~~~~
    ...
    ... ~~~~
    ... ~~~~~~~~'''
    >>> print markdown.markdown(text, extensions=['fenced_code'])
    <pre><code>
    ~~~~
    </code></pre>

Language tags:

    >>> text = '''
    ... ~~~~{.python}
    ... # Some python code
    ... ~~~~'''
    >>> print markdown.markdown(text, extensions=['fenced_code'])
    <pre><code class="python"># Some python code
    </code></pre>

Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).

Project website: <http://packages.python.org/Markdown/extensions/fenced_code_blocks.html>
Contact: markdown@freewisdom.org

License: BSD (see ../docs/LICENSE for details)

Dependencies:
* [Python 2.4+](http://python.org)
* [Markdown 2.0+](http://packages.python.org/Markdown/)
* [Pygments (optional)](http://pygments.org)

"""

import re
import markdown
from zerver.lib.bugdown.codehilite import CodeHilite, CodeHiliteExtension

# Global vars
FENCE_RE = re.compile(r"""
    # ~~~ or ```
    (?P<fence>
        ^(?:~{3,}|`{3,})
    )

    [ ]* # spaces

    (
        \{?\.?
        (?P<lang>
            [a-zA-Z0-9_+-]*
        ) # "py" or "javascript"
        \}?
    ) # language, like ".py" or "{javascript}"
    $
    """, re.VERBOSE)


CODE_WRAP = '<pre><code%s>%s</code></pre>'
LANG_TAG = ' class="%s"'

class FencedCodeExtension(markdown.Extension):

    def extendMarkdown(self, md, md_globals):
        """ Add FencedBlockPreprocessor to the Markdown instance. """
        md.registerExtension(self)

        # Newer versions of Python-Markdown (starting at 2.3?) have
        # a normalize_whitespace preprocessor that needs to go first.
        position = ('>normalize_whitespace'
            if 'normalize_whitespace' in md.preprocessors
            else '_begin')

        md.preprocessors.add('fenced_code_block',
                                 FencedBlockPreprocessor(md),
                                 position)


class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):

    def __init__(self, md):
        markdown.preprocessors.Preprocessor.__init__(self, md)

        self.checked_for_codehilite = False
        self.codehilite_conf = {}

    def run(self, lines):
        """ Match and store Fenced Code Blocks in the HtmlStash. """

        output = []

        class Record:
            pass

        processor = self
        handlers = []

        def push(handler):
            handlers.append(handler)

        def pop():
            handlers.pop()

        class OuterHandler:
            def __init__(self, output):
                self.output = output

            def handle_line(self, line):
                check_for_new_fence(self.output, line)

            def done(self):
                pop()

        def check_for_new_fence(output, line):
            m = FENCE_RE.match(line)
            if m:
                fence = m.group('fence')
                lang = m.group('lang')
                handler = generic_handler(output, fence, lang)
                push(handler)
            else:
                output.append(line)

        def generic_handler(output, fence, lang):
            if lang in ('quote', 'quoted'):
                return QuoteHandler(output, fence)
            else:
                return CodeHandler(output, fence, lang)

        class QuoteHandler:
            def __init__(self, output, fence):
                self.output = output
                self.fence = fence
                self.lines = []

            def handle_line(self, line):
                if line.rstrip() == self.fence:
                    self.done()
                else:
                    check_for_new_fence(self.lines, line)

            def done(self):
                text = '\n'.join(self.lines)
                text = processor.format_quote(text)
                processed_lines = text.split('\n')
                self.output.append('')
                self.output.extend(processed_lines)
                self.output.append('')
                pop()

        class CodeHandler:
            def __init__(self, output, fence, lang):
                self.output = output
                self.fence = fence
                self.lang = lang
                self.lines = []

            def handle_line(self, line):
                if line.rstrip() == self.fence:
                    self.done()
                else:
                    self.lines.append(line)

            def done(self):
                text = '\n'.join(self.lines)
                text = processor.format_code(self.lang, text)
                text = processor.placeholder(text)
                processed_lines = text.split('\n')
                self.output.append('')
                self.output.extend(processed_lines)
                self.output.append('')
                pop()

        handler = OuterHandler(output)
        push(handler)

        for line in lines:
            handlers[-1].handle_line(line)

        while handlers:
            handlers[-1].done()

        # This fiddly handling of new lines at the end of our output was done to make
        # existing tests pass.  Bugdown is just kind of funny when it comes to new lines,
        # but we could probably remove this hack.
        if len(output) > 2 and output[-2] != '':
            output.append('')
        return output

    def format_code(self, lang, text):
        langclass = ''
        if lang:
            langclass = LANG_TAG % (lang,)

        # Check for code hilite extension
        if not self.checked_for_codehilite:
            for ext in self.markdown.registeredExtensions:
                if isinstance(ext, CodeHiliteExtension):
                    self.codehilite_conf = ext.config
                    break

            self.checked_for_codehilite = True

        # If config is not empty, then the codehighlite extension
        # is enabled, so we call it to highlite the code
        if self.codehilite_conf:
            highliter = CodeHilite(text,
                    force_linenos=self.codehilite_conf['force_linenos'][0],
                    guess_lang=self.codehilite_conf['guess_lang'][0],
                    css_class=self.codehilite_conf['css_class'][0],
                    style=self.codehilite_conf['pygments_style'][0],
                    lang=(lang or None),
                    noclasses=self.codehilite_conf['noclasses'][0])

            code = highliter.hilite()
        else:
            code = CODE_WRAP % (langclass, self._escape(text))

        return code

    def format_quote(self, text):
        paragraphs = text.split("\n\n")
        quoted_paragraphs = []
        for paragraph in paragraphs:
            lines = paragraph.split("\n")
            quoted_paragraphs.append("\n".join("> " + line for line in lines if line != ''))
        return "\n\n".join(quoted_paragraphs)

    def placeholder(self, code):
        return self.markdown.htmlStash.store(code, safe=True)

    def _escape(self, txt):
        """ basic html escaping """
        txt = txt.replace('&', '&amp;')
        txt = txt.replace('<', '&lt;')
        txt = txt.replace('>', '&gt;')
        txt = txt.replace('"', '&quot;')
        return txt


def makeExtension(configs=None):
    return FencedCodeExtension(configs=configs)


if __name__ == "__main__":
    import doctest
    doctest.testmod()
Import GitHub version of Markdown fenced_code extension (imported from commit 929de2ba09a5c6dabed20c7f1b3ba319ba42b244) 2012-11-19 17:55:28 +01:00			`#!/usr/bin/env python`

			`"""`
			`Fenced Code Extension for Python Markdown`
			`=========================================`

			`This extension adds Fenced Code Blocks to Python-Markdown.`

			`>>> import markdown`
			`>>> text = '''`
			`... A paragraph before a fenced code block:`
			`...`
			`... ~~~`
			`... Fenced code block`
			`... ~~~`
			`... '''`
			`>>> html = markdown.markdown(text, extensions=['fenced_code'])`
			`>>> print html`
			`<p>A paragraph before a fenced code block:</p>`
			`<pre><code>Fenced code block`
			`</code></pre>`

			`Works with safe_mode also (we check this because we are using the HtmlStash):`

			`>>> print markdown.markdown(text, extensions=['fenced_code'], safe_mode='replace')`
			`<p>A paragraph before a fenced code block:</p>`
			`<pre><code>Fenced code block`
			`</code></pre>`

			`Include tilde's in a code block and wrap with blank lines:`

			`>>> text = '''`
			`... ~~~~~~~~`
			`...`
			`... ~~~~`
			`... ~~~~~~~~'''`
			`>>> print markdown.markdown(text, extensions=['fenced_code'])`
			`<pre><code>`
			`~~~~`
			`</code></pre>`

			`Language tags:`

			`>>> text = '''`
			`... ~~~~{.python}`
			`... # Some python code`
			`... ~~~~'''`
			`>>> print markdown.markdown(text, extensions=['fenced_code'])`
			`<pre><code class="python"># Some python code`
			`</code></pre>`

			`Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).`

			`Project website: <http://packages.python.org/Markdown/extensions/fenced_code_blocks.html>`
			`Contact: markdown@freewisdom.org`

			`License: BSD (see ../docs/LICENSE for details)`

			`Dependencies:`
			`* [Python 2.4+](http://python.org)`
			`* [Markdown 2.0+](http://packages.python.org/Markdown/)`
			`* [Pygments (optional)](http://pygments.org)`

			`"""`

			`import re`
			`import markdown`
[manual] Rename Django app from zephyr to zerver. This needs to be deployed to both staging and prod at the same off-peak time (and the schema migration run). At the time it is deployed, we need to make a few changes directly in the database: (1) UPDATE django_content_type set app_label='zerver' where app_label='zephyr'; (2) UPDATE south_migrationhistory set app_name='zerver' where app_name='zephyr'; (imported from commit eb3fd719571740189514ef0b884738cb30df1320) 2013-07-29 23:03:31 +02:00			`from zerver.lib.bugdown.codehilite import CodeHilite, CodeHiliteExtension`
Import GitHub version of Markdown fenced_code extension (imported from commit 929de2ba09a5c6dabed20c7f1b3ba319ba42b244) 2012-11-19 17:55:28 +01:00
			`# Global vars`
Support arbitrarily nested fenced quote/code blocks. Now we can nest fenced code/quote blocks inside of quote blocks down to arbitrary depths. Code blocks are always leafs. Fenced blocks start with at least three tildes or backticks, and the clump of punctuation then becomes the terminator for the block. If the user ends their message without terminators, all blocks are automatically closed. When inside a quote block, you can start another fenced block with any header that doesn't match the end-string of the outer block. (If you don't want to specify a language, then you can change the number of backticks/tildes to avoid amiguity.) Most of the heavy lifting happens in FencedBlockPreprocessor.run(). The parser works by pushing handlers on to a stack and popping them off when the ends of blocks are encountered. Parents communicate with their children by passing in a simple Python list of strings for the child to append to. Handlers also maintain their own lists for their own content, and when their done() method is called, they render their data as needed. The handlers are objects returned by functions, and the handler functions close on variables push, pop, and processor. The closure style here makes the handlers pretty tightly coupled to the outer run() method. If we wanted to move to a class-based style, the tradeoff would be that the class instances would have to marshall push/pop/processor etc., but we could test the components more easily in isolation. Dealing with blank lines is very fiddly inside of bugdown. The new functionality here is captured in the test BugdownTest.test_complexly_nested_quote(). (imported from commit 53886c8de74bdf2bbd3cef8be9de25f05bddb93c) 2013-11-20 23:25:48 +01:00			`FENCE_RE = re.compile(r"""`
			# ~~~ or ```
			`(?P<fence>`
			^(?:~{3,}\|`{3,})
Import GitHub version of Markdown fenced_code extension (imported from commit 929de2ba09a5c6dabed20c7f1b3ba319ba42b244) 2012-11-19 17:55:28 +01:00			`)`
Support arbitrarily nested fenced quote/code blocks. Now we can nest fenced code/quote blocks inside of quote blocks down to arbitrary depths. Code blocks are always leafs. Fenced blocks start with at least three tildes or backticks, and the clump of punctuation then becomes the terminator for the block. If the user ends their message without terminators, all blocks are automatically closed. When inside a quote block, you can start another fenced block with any header that doesn't match the end-string of the outer block. (If you don't want to specify a language, then you can change the number of backticks/tildes to avoid amiguity.) Most of the heavy lifting happens in FencedBlockPreprocessor.run(). The parser works by pushing handlers on to a stack and popping them off when the ends of blocks are encountered. Parents communicate with their children by passing in a simple Python list of strings for the child to append to. Handlers also maintain their own lists for their own content, and when their done() method is called, they render their data as needed. The handlers are objects returned by functions, and the handler functions close on variables push, pop, and processor. The closure style here makes the handlers pretty tightly coupled to the outer run() method. If we wanted to move to a class-based style, the tradeoff would be that the class instances would have to marshall push/pop/processor etc., but we could test the components more easily in isolation. Dealing with blank lines is very fiddly inside of bugdown. The new functionality here is captured in the test BugdownTest.test_complexly_nested_quote(). (imported from commit 53886c8de74bdf2bbd3cef8be9de25f05bddb93c) 2013-11-20 23:25:48 +01:00
			`[ ]* # spaces`

			`(`
			`\{?\.?`
			`(?P<lang>`
			`[a-zA-Z0-9_+-]*`
			`) # "py" or "javascript"`
			`\}?`
			`) # language, like ".py" or "{javascript}"`
			`$`
			`""", re.VERBOSE)`


Import GitHub version of Markdown fenced_code extension (imported from commit 929de2ba09a5c6dabed20c7f1b3ba319ba42b244) 2012-11-19 17:55:28 +01:00			`CODE_WRAP = '<pre><code%s>%s</code></pre>'`
			`LANG_TAG = ' class="%s"'`

			`class FencedCodeExtension(markdown.Extension):`

			`def extendMarkdown(self, md, md_globals):`
			`""" Add FencedBlockPreprocessor to the Markdown instance. """`
			`md.registerExtension(self)`

bugdown: Fix fenced_code for Python-Markdown 2.3 (imported from commit 3954444708e222217407df228f07d2cad402a02b) 2013-04-04 23:14:33 +02:00			`# Newer versions of Python-Markdown (starting at 2.3?) have`
			`# a normalize_whitespace preprocessor that needs to go first.`
			`position = ('>normalize_whitespace'`
			`if 'normalize_whitespace' in md.preprocessors`
			`else '_begin')`

Import GitHub version of Markdown fenced_code extension (imported from commit 929de2ba09a5c6dabed20c7f1b3ba319ba42b244) 2012-11-19 17:55:28 +01:00			`md.preprocessors.add('fenced_code_block',`
			`FencedBlockPreprocessor(md),`
bugdown: Fix fenced_code for Python-Markdown 2.3 (imported from commit 3954444708e222217407df228f07d2cad402a02b) 2013-04-04 23:14:33 +02:00			`position)`
Import GitHub version of Markdown fenced_code extension (imported from commit 929de2ba09a5c6dabed20c7f1b3ba319ba42b244) 2012-11-19 17:55:28 +01:00

			`class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):`

			`def __init__(self, md):`
			`markdown.preprocessors.Preprocessor.__init__(self, md)`

			`self.checked_for_codehilite = False`
			`self.codehilite_conf = {}`

Support arbitrarily nested fenced quote/code blocks. Now we can nest fenced code/quote blocks inside of quote blocks down to arbitrary depths. Code blocks are always leafs. Fenced blocks start with at least three tildes or backticks, and the clump of punctuation then becomes the terminator for the block. If the user ends their message without terminators, all blocks are automatically closed. When inside a quote block, you can start another fenced block with any header that doesn't match the end-string of the outer block. (If you don't want to specify a language, then you can change the number of backticks/tildes to avoid amiguity.) Most of the heavy lifting happens in FencedBlockPreprocessor.run(). The parser works by pushing handlers on to a stack and popping them off when the ends of blocks are encountered. Parents communicate with their children by passing in a simple Python list of strings for the child to append to. Handlers also maintain their own lists for their own content, and when their done() method is called, they render their data as needed. The handlers are objects returned by functions, and the handler functions close on variables push, pop, and processor. The closure style here makes the handlers pretty tightly coupled to the outer run() method. If we wanted to move to a class-based style, the tradeoff would be that the class instances would have to marshall push/pop/processor etc., but we could test the components more easily in isolation. Dealing with blank lines is very fiddly inside of bugdown. The new functionality here is captured in the test BugdownTest.test_complexly_nested_quote(). (imported from commit 53886c8de74bdf2bbd3cef8be9de25f05bddb93c) 2013-11-20 23:25:48 +01:00			`def run(self, lines):`
			`""" Match and store Fenced Code Blocks in the HtmlStash. """`

			`output = []`

			`class Record:`
			`pass`

			`processor = self`
			`handlers = []`

			`def push(handler):`
			`handlers.append(handler)`

			`def pop():`
			`handlers.pop()`

			`class OuterHandler:`
			`def __init__(self, output):`
			`self.output = output`

			`def handle_line(self, line):`
			`check_for_new_fence(self.output, line)`

			`def done(self):`
			`pop()`

			`def check_for_new_fence(output, line):`
			`m = FENCE_RE.match(line)`
			`if m:`
			`fence = m.group('fence')`
			`lang = m.group('lang')`
			`handler = generic_handler(output, fence, lang)`
			`push(handler)`
			`else:`
			`output.append(line)`

			`def generic_handler(output, fence, lang):`
			`if lang in ('quote', 'quoted'):`
			`return QuoteHandler(output, fence)`
			`else:`
			`return CodeHandler(output, fence, lang)`

			`class QuoteHandler:`
			`def __init__(self, output, fence):`
			`self.output = output`
			`self.fence = fence`
			`self.lines = []`

			`def handle_line(self, line):`
			`if line.rstrip() == self.fence:`
			`self.done()`
			`else:`
			`check_for_new_fence(self.lines, line)`

			`def done(self):`
			`text = '\n'.join(self.lines)`
			`text = processor.format_quote(text)`
			`processed_lines = text.split('\n')`
			`self.output.append('')`
			`self.output.extend(processed_lines)`
			`self.output.append('')`
			`pop()`

			`class CodeHandler:`
			`def __init__(self, output, fence, lang):`
			`self.output = output`
			`self.fence = fence`
			`self.lang = lang`
			`self.lines = []`

			`def handle_line(self, line):`
			`if line.rstrip() == self.fence:`
			`self.done()`
			`else:`
			`self.lines.append(line)`

			`def done(self):`
			`text = '\n'.join(self.lines)`
			`text = processor.format_code(self.lang, text)`
			`text = processor.placeholder(text)`
			`processed_lines = text.split('\n')`
			`self.output.append('')`
			`self.output.extend(processed_lines)`
			`self.output.append('')`
			`pop()`

			`handler = OuterHandler(output)`
			`push(handler)`

			`for line in lines:`
			`handlers[-1].handle_line(line)`

			`while handlers:`
			`handlers[-1].done()`

			`# This fiddly handling of new lines at the end of our output was done to make`
			`# existing tests pass. Bugdown is just kind of funny when it comes to new lines,`
			`# but we could probably remove this hack.`
			`if len(output) > 2 and output[-2] != '':`
			`output.append('')`
			`return output`

Move langclass var into FencedBlockPreprocessor.format_code(). (imported from commit e468a8b6a12494facb0dfa97eff90e02cd14f169) 2013-11-20 19:48:44 +01:00			`def format_code(self, lang, text):`
			`langclass = ''`
			`if lang:`
			`langclass = LANG_TAG % (lang,)`

Extract FencedBlockPreprocessor.format_code(). (imported from commit 3c98b9a7391a71e9c55a7b5c97abd0ee74178b44) 2013-11-20 19:11:07 +01:00			`# Check for code hilite extension`
			`if not self.checked_for_codehilite:`
			`for ext in self.markdown.registeredExtensions:`
			`if isinstance(ext, CodeHiliteExtension):`
			`self.codehilite_conf = ext.config`
			`break`

			`self.checked_for_codehilite = True`

			`# If config is not empty, then the codehighlite extension`
			`# is enabled, so we call it to highlite the code`
			`if self.codehilite_conf:`
			`highliter = CodeHilite(text,`
			`force_linenos=self.codehilite_conf['force_linenos'][0],`
			`guess_lang=self.codehilite_conf['guess_lang'][0],`
			`css_class=self.codehilite_conf['css_class'][0],`
			`style=self.codehilite_conf['pygments_style'][0],`
			`lang=(lang or None),`
			`noclasses=self.codehilite_conf['noclasses'][0])`

			`code = highliter.hilite()`
			`else:`
			`code = CODE_WRAP % (langclass, self._escape(text))`

			`return code`
Refactor fence block code to never infinite loop (imported from commit f72cb182e4fc9c4e8003853276d8aa40b454d08f) 2013-01-29 16:14:30 +01:00
Extract FencedBlockPreprocessor.format_quote(). (imported from commit 59db5415d624402b37ade7fe5547b16f58ae565f) 2013-11-20 19:29:54 +01:00			`def format_quote(self, text):`
			`paragraphs = text.split("\n\n")`
			`quoted_paragraphs = []`
			`for paragraph in paragraphs:`
			`lines = paragraph.split("\n")`
			`quoted_paragraphs.append("\n".join("> " + line for line in lines if line != ''))`
			`return "\n\n".join(quoted_paragraphs)`

Extract FencedBlockPreprocessor.placeholder(). (imported from commit cf6dab74ae97a57498e1e1807c7c4f272f3f448b) 2013-11-20 21:03:57 +01:00			`def placeholder(self, code):`
			`return self.markdown.htmlStash.store(code, safe=True)`

Import GitHub version of Markdown fenced_code extension (imported from commit 929de2ba09a5c6dabed20c7f1b3ba319ba42b244) 2012-11-19 17:55:28 +01:00			`def _escape(self, txt):`
			`""" basic html escaping """`
			`txt = txt.replace('&', '&')`
			`txt = txt.replace('<', '<')`
			`txt = txt.replace('>', '>')`
			`txt = txt.replace('"', '"')`
			`return txt`


			`def makeExtension(configs=None):`
			`return FencedCodeExtension(configs=configs)`


			`if __name__ == "__main__":`
			`import doctest`
			`doctest.testmod()`