2012-11-19 17:55:28 +01:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
"""
|
|
|
|
Fenced Code Extension for Python Markdown
|
|
|
|
=========================================
|
|
|
|
|
|
|
|
This extension adds Fenced Code Blocks to Python-Markdown.
|
|
|
|
|
|
|
|
>>> import markdown
|
|
|
|
>>> text = '''
|
|
|
|
... A paragraph before a fenced code block:
|
|
|
|
...
|
|
|
|
... ~~~
|
|
|
|
... Fenced code block
|
|
|
|
... ~~~
|
|
|
|
... '''
|
|
|
|
>>> html = markdown.markdown(text, extensions=['fenced_code'])
|
|
|
|
>>> print html
|
|
|
|
<p>A paragraph before a fenced code block:</p>
|
|
|
|
<pre><code>Fenced code block
|
|
|
|
</code></pre>
|
|
|
|
|
|
|
|
Works with safe_mode also (we check this because we are using the HtmlStash):
|
|
|
|
|
|
|
|
>>> print markdown.markdown(text, extensions=['fenced_code'], safe_mode='replace')
|
|
|
|
<p>A paragraph before a fenced code block:</p>
|
|
|
|
<pre><code>Fenced code block
|
|
|
|
</code></pre>
|
|
|
|
|
|
|
|
Include tilde's in a code block and wrap with blank lines:
|
|
|
|
|
|
|
|
>>> text = '''
|
|
|
|
... ~~~~~~~~
|
|
|
|
...
|
|
|
|
... ~~~~
|
|
|
|
... ~~~~~~~~'''
|
|
|
|
>>> print markdown.markdown(text, extensions=['fenced_code'])
|
|
|
|
<pre><code>
|
|
|
|
~~~~
|
|
|
|
</code></pre>
|
|
|
|
|
|
|
|
Language tags:
|
|
|
|
|
|
|
|
>>> text = '''
|
|
|
|
... ~~~~{.python}
|
|
|
|
... # Some python code
|
|
|
|
... ~~~~'''
|
|
|
|
>>> print markdown.markdown(text, extensions=['fenced_code'])
|
|
|
|
<pre><code class="python"># Some python code
|
|
|
|
</code></pre>
|
|
|
|
|
|
|
|
Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).
|
|
|
|
|
|
|
|
Project website: <http://packages.python.org/Markdown/extensions/fenced_code_blocks.html>
|
|
|
|
Contact: markdown@freewisdom.org
|
|
|
|
|
|
|
|
License: BSD (see ../docs/LICENSE for details)
|
|
|
|
|
|
|
|
Dependencies:
|
|
|
|
* [Python 2.4+](http://python.org)
|
|
|
|
* [Markdown 2.0+](http://packages.python.org/Markdown/)
|
|
|
|
* [Pygments (optional)](http://pygments.org)
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
import re
|
|
|
|
import markdown
|
2013-07-29 23:03:31 +02:00
|
|
|
from zerver.lib.bugdown.codehilite import CodeHilite, CodeHiliteExtension
|
2012-11-19 17:55:28 +01:00
|
|
|
|
|
|
|
# Global vars
|
Support arbitrarily nested fenced quote/code blocks.
Now we can nest fenced code/quote blocks inside of quote
blocks down to arbitrary depths. Code blocks are always leafs.
Fenced blocks start with at least three tildes or backticks,
and the clump of punctuation then becomes the terminator for
the block. If the user ends their message without terminators,
all blocks are automatically closed.
When inside a quote block, you can start another fenced block
with any header that doesn't match the end-string of the outer
block. (If you don't want to specify a language, then you
can change the number of backticks/tildes to avoid amiguity.)
Most of the heavy lifting happens in FencedBlockPreprocessor.run().
The parser works by pushing handlers on to a stack and popping
them off when the ends of blocks are encountered. Parents communicate
with their children by passing in a simple Python list of strings
for the child to append to. Handlers also maintain their own
lists for their own content, and when their done() method is called,
they render their data as needed.
The handlers are objects returned by functions, and the handler
functions close on variables push, pop, and processor. The closure
style here makes the handlers pretty tightly coupled to the outer
run() method. If we wanted to move to a class-based style, the
tradeoff would be that the class instances would have to marshall
push/pop/processor etc., but we could test the components more
easily in isolation.
Dealing with blank lines is very fiddly inside of bugdown.
The new functionality here is captured in the test
BugdownTest.test_complexly_nested_quote().
(imported from commit 53886c8de74bdf2bbd3cef8be9de25f05bddb93c)
2013-11-20 23:25:48 +01:00
|
|
|
FENCE_RE = re.compile(r"""
|
|
|
|
# ~~~ or ```
|
|
|
|
(?P<fence>
|
|
|
|
^(?:~{3,}|`{3,})
|
2012-11-19 17:55:28 +01:00
|
|
|
)
|
Support arbitrarily nested fenced quote/code blocks.
Now we can nest fenced code/quote blocks inside of quote
blocks down to arbitrary depths. Code blocks are always leafs.
Fenced blocks start with at least three tildes or backticks,
and the clump of punctuation then becomes the terminator for
the block. If the user ends their message without terminators,
all blocks are automatically closed.
When inside a quote block, you can start another fenced block
with any header that doesn't match the end-string of the outer
block. (If you don't want to specify a language, then you
can change the number of backticks/tildes to avoid amiguity.)
Most of the heavy lifting happens in FencedBlockPreprocessor.run().
The parser works by pushing handlers on to a stack and popping
them off when the ends of blocks are encountered. Parents communicate
with their children by passing in a simple Python list of strings
for the child to append to. Handlers also maintain their own
lists for their own content, and when their done() method is called,
they render their data as needed.
The handlers are objects returned by functions, and the handler
functions close on variables push, pop, and processor. The closure
style here makes the handlers pretty tightly coupled to the outer
run() method. If we wanted to move to a class-based style, the
tradeoff would be that the class instances would have to marshall
push/pop/processor etc., but we could test the components more
easily in isolation.
Dealing with blank lines is very fiddly inside of bugdown.
The new functionality here is captured in the test
BugdownTest.test_complexly_nested_quote().
(imported from commit 53886c8de74bdf2bbd3cef8be9de25f05bddb93c)
2013-11-20 23:25:48 +01:00
|
|
|
|
|
|
|
[ ]* # spaces
|
|
|
|
|
|
|
|
(
|
|
|
|
\{?\.?
|
|
|
|
(?P<lang>
|
|
|
|
[a-zA-Z0-9_+-]*
|
|
|
|
) # "py" or "javascript"
|
|
|
|
\}?
|
|
|
|
) # language, like ".py" or "{javascript}"
|
|
|
|
$
|
|
|
|
""", re.VERBOSE)
|
|
|
|
|
|
|
|
|
2012-11-19 17:55:28 +01:00
|
|
|
CODE_WRAP = '<pre><code%s>%s</code></pre>'
|
|
|
|
LANG_TAG = ' class="%s"'
|
|
|
|
|
|
|
|
class FencedCodeExtension(markdown.Extension):
|
|
|
|
|
|
|
|
def extendMarkdown(self, md, md_globals):
|
|
|
|
""" Add FencedBlockPreprocessor to the Markdown instance. """
|
|
|
|
md.registerExtension(self)
|
|
|
|
|
2013-04-04 23:14:33 +02:00
|
|
|
# Newer versions of Python-Markdown (starting at 2.3?) have
|
|
|
|
# a normalize_whitespace preprocessor that needs to go first.
|
|
|
|
position = ('>normalize_whitespace'
|
|
|
|
if 'normalize_whitespace' in md.preprocessors
|
|
|
|
else '_begin')
|
|
|
|
|
2012-11-19 17:55:28 +01:00
|
|
|
md.preprocessors.add('fenced_code_block',
|
|
|
|
FencedBlockPreprocessor(md),
|
2013-04-04 23:14:33 +02:00
|
|
|
position)
|
2012-11-19 17:55:28 +01:00
|
|
|
|
|
|
|
|
|
|
|
class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
|
|
|
|
|
|
|
|
def __init__(self, md):
|
|
|
|
markdown.preprocessors.Preprocessor.__init__(self, md)
|
|
|
|
|
|
|
|
self.checked_for_codehilite = False
|
|
|
|
self.codehilite_conf = {}
|
|
|
|
|
Support arbitrarily nested fenced quote/code blocks.
Now we can nest fenced code/quote blocks inside of quote
blocks down to arbitrary depths. Code blocks are always leafs.
Fenced blocks start with at least three tildes or backticks,
and the clump of punctuation then becomes the terminator for
the block. If the user ends their message without terminators,
all blocks are automatically closed.
When inside a quote block, you can start another fenced block
with any header that doesn't match the end-string of the outer
block. (If you don't want to specify a language, then you
can change the number of backticks/tildes to avoid amiguity.)
Most of the heavy lifting happens in FencedBlockPreprocessor.run().
The parser works by pushing handlers on to a stack and popping
them off when the ends of blocks are encountered. Parents communicate
with their children by passing in a simple Python list of strings
for the child to append to. Handlers also maintain their own
lists for their own content, and when their done() method is called,
they render their data as needed.
The handlers are objects returned by functions, and the handler
functions close on variables push, pop, and processor. The closure
style here makes the handlers pretty tightly coupled to the outer
run() method. If we wanted to move to a class-based style, the
tradeoff would be that the class instances would have to marshall
push/pop/processor etc., but we could test the components more
easily in isolation.
Dealing with blank lines is very fiddly inside of bugdown.
The new functionality here is captured in the test
BugdownTest.test_complexly_nested_quote().
(imported from commit 53886c8de74bdf2bbd3cef8be9de25f05bddb93c)
2013-11-20 23:25:48 +01:00
|
|
|
def run(self, lines):
|
|
|
|
""" Match and store Fenced Code Blocks in the HtmlStash. """
|
|
|
|
|
|
|
|
output = []
|
|
|
|
|
|
|
|
class Record:
|
|
|
|
pass
|
|
|
|
|
|
|
|
processor = self
|
|
|
|
handlers = []
|
|
|
|
|
|
|
|
def push(handler):
|
|
|
|
handlers.append(handler)
|
|
|
|
|
|
|
|
def pop():
|
|
|
|
handlers.pop()
|
|
|
|
|
|
|
|
class OuterHandler:
|
|
|
|
def __init__(self, output):
|
|
|
|
self.output = output
|
|
|
|
|
|
|
|
def handle_line(self, line):
|
|
|
|
check_for_new_fence(self.output, line)
|
|
|
|
|
|
|
|
def done(self):
|
|
|
|
pop()
|
|
|
|
|
|
|
|
def check_for_new_fence(output, line):
|
|
|
|
m = FENCE_RE.match(line)
|
|
|
|
if m:
|
|
|
|
fence = m.group('fence')
|
|
|
|
lang = m.group('lang')
|
|
|
|
handler = generic_handler(output, fence, lang)
|
|
|
|
push(handler)
|
|
|
|
else:
|
|
|
|
output.append(line)
|
|
|
|
|
|
|
|
def generic_handler(output, fence, lang):
|
|
|
|
if lang in ('quote', 'quoted'):
|
|
|
|
return QuoteHandler(output, fence)
|
|
|
|
else:
|
|
|
|
return CodeHandler(output, fence, lang)
|
|
|
|
|
|
|
|
class QuoteHandler:
|
|
|
|
def __init__(self, output, fence):
|
|
|
|
self.output = output
|
|
|
|
self.fence = fence
|
|
|
|
self.lines = []
|
|
|
|
|
|
|
|
def handle_line(self, line):
|
|
|
|
if line.rstrip() == self.fence:
|
|
|
|
self.done()
|
|
|
|
else:
|
|
|
|
check_for_new_fence(self.lines, line)
|
|
|
|
|
|
|
|
def done(self):
|
|
|
|
text = '\n'.join(self.lines)
|
|
|
|
text = processor.format_quote(text)
|
|
|
|
processed_lines = text.split('\n')
|
|
|
|
self.output.append('')
|
|
|
|
self.output.extend(processed_lines)
|
|
|
|
self.output.append('')
|
|
|
|
pop()
|
|
|
|
|
|
|
|
class CodeHandler:
|
|
|
|
def __init__(self, output, fence, lang):
|
|
|
|
self.output = output
|
|
|
|
self.fence = fence
|
|
|
|
self.lang = lang
|
|
|
|
self.lines = []
|
|
|
|
|
|
|
|
def handle_line(self, line):
|
|
|
|
if line.rstrip() == self.fence:
|
|
|
|
self.done()
|
|
|
|
else:
|
|
|
|
self.lines.append(line)
|
|
|
|
|
|
|
|
def done(self):
|
|
|
|
text = '\n'.join(self.lines)
|
|
|
|
text = processor.format_code(self.lang, text)
|
|
|
|
text = processor.placeholder(text)
|
|
|
|
processed_lines = text.split('\n')
|
|
|
|
self.output.append('')
|
|
|
|
self.output.extend(processed_lines)
|
|
|
|
self.output.append('')
|
|
|
|
pop()
|
|
|
|
|
|
|
|
handler = OuterHandler(output)
|
|
|
|
push(handler)
|
|
|
|
|
|
|
|
for line in lines:
|
|
|
|
handlers[-1].handle_line(line)
|
|
|
|
|
|
|
|
while handlers:
|
|
|
|
handlers[-1].done()
|
|
|
|
|
|
|
|
# This fiddly handling of new lines at the end of our output was done to make
|
|
|
|
# existing tests pass. Bugdown is just kind of funny when it comes to new lines,
|
|
|
|
# but we could probably remove this hack.
|
|
|
|
if len(output) > 2 and output[-2] != '':
|
|
|
|
output.append('')
|
|
|
|
return output
|
|
|
|
|
2013-11-20 19:48:44 +01:00
|
|
|
def format_code(self, lang, text):
|
|
|
|
langclass = ''
|
|
|
|
if lang:
|
|
|
|
langclass = LANG_TAG % (lang,)
|
|
|
|
|
2013-11-20 19:11:07 +01:00
|
|
|
# Check for code hilite extension
|
|
|
|
if not self.checked_for_codehilite:
|
|
|
|
for ext in self.markdown.registeredExtensions:
|
|
|
|
if isinstance(ext, CodeHiliteExtension):
|
|
|
|
self.codehilite_conf = ext.config
|
|
|
|
break
|
|
|
|
|
|
|
|
self.checked_for_codehilite = True
|
|
|
|
|
|
|
|
# If config is not empty, then the codehighlite extension
|
|
|
|
# is enabled, so we call it to highlite the code
|
|
|
|
if self.codehilite_conf:
|
|
|
|
highliter = CodeHilite(text,
|
|
|
|
force_linenos=self.codehilite_conf['force_linenos'][0],
|
|
|
|
guess_lang=self.codehilite_conf['guess_lang'][0],
|
|
|
|
css_class=self.codehilite_conf['css_class'][0],
|
|
|
|
style=self.codehilite_conf['pygments_style'][0],
|
|
|
|
lang=(lang or None),
|
|
|
|
noclasses=self.codehilite_conf['noclasses'][0])
|
|
|
|
|
|
|
|
code = highliter.hilite()
|
|
|
|
else:
|
|
|
|
code = CODE_WRAP % (langclass, self._escape(text))
|
|
|
|
|
|
|
|
return code
|
2013-01-29 16:14:30 +01:00
|
|
|
|
2013-11-20 19:29:54 +01:00
|
|
|
def format_quote(self, text):
|
|
|
|
paragraphs = text.split("\n\n")
|
|
|
|
quoted_paragraphs = []
|
|
|
|
for paragraph in paragraphs:
|
|
|
|
lines = paragraph.split("\n")
|
|
|
|
quoted_paragraphs.append("\n".join("> " + line for line in lines if line != ''))
|
|
|
|
return "\n\n".join(quoted_paragraphs)
|
|
|
|
|
2013-11-20 21:03:57 +01:00
|
|
|
def placeholder(self, code):
|
|
|
|
return self.markdown.htmlStash.store(code, safe=True)
|
|
|
|
|
2012-11-19 17:55:28 +01:00
|
|
|
def _escape(self, txt):
|
|
|
|
""" basic html escaping """
|
|
|
|
txt = txt.replace('&', '&')
|
|
|
|
txt = txt.replace('<', '<')
|
|
|
|
txt = txt.replace('>', '>')
|
|
|
|
txt = txt.replace('"', '"')
|
|
|
|
return txt
|
|
|
|
|
|
|
|
|
|
|
|
def makeExtension(configs=None):
|
|
|
|
return FencedCodeExtension(configs=configs)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import doctest
|
|
|
|
doctest.testmod()
|