""" Fenced Code Extension for Python Markdown ========================================= This extension adds Fenced Code Blocks to Python-Markdown. >>> import markdown >>> text = ''' ... A paragraph before a fenced code block: ... ... ~~~ ... Fenced code block ... ~~~ ... ''' >>> html = markdown.markdown(text, extensions=['fenced_code']) >>> print html

A paragraph before a fenced code block:

Fenced code block
    
Works with safe_mode also (we check this because we are using the HtmlStash): >>> print markdown.markdown(text, extensions=['fenced_code'], safe_mode='replace')

A paragraph before a fenced code block:

Fenced code block
    
Include tilde's in a code block and wrap with blank lines: >>> text = ''' ... ~~~~~~~~ ... ... ~~~~ ... ~~~~~~~~''' >>> print markdown.markdown(text, extensions=['fenced_code'])

    ~~~~
    
Removes trailing whitespace from code blocks that cause horizontal scrolling >>> import markdown >>> text = ''' ... A paragraph before a fenced code block: ... ... ~~~ ... Fenced code block \t\t\t\t\t\t\t ... ~~~ ... ''' >>> html = markdown.markdown(text, extensions=['fenced_code']) >>> print html

A paragraph before a fenced code block:

Fenced code block
    
Language tags: >>> text = ''' ... ~~~~{.python} ... # Some python code ... ~~~~''' >>> print markdown.markdown(text, extensions=['fenced_code'])
# Some python code
    
Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/). Project website: Contact: markdown@freewisdom.org License: BSD (see ../docs/LICENSE for details) Dependencies: * [Python 2.4+](http://python.org) * [Markdown 2.0+](http://packages.python.org/Markdown/) * [Pygments (optional)](http://pygments.org) """ import re from typing import Any, Dict, Iterable, List, Mapping, MutableSequence, Optional import markdown from django.utils.html import escape from lxml import etree from markdown.extensions.codehilite import CodeHilite, CodeHiliteExtension from pygments.lexers import get_lexer_by_name from pygments.util import ClassNotFound from zerver.lib.exceptions import MarkdownRenderingException from zerver.lib.tex import render_tex # Global vars FENCE_RE = re.compile(""" # ~~~ or ``` (?P ^(?:~{3,}|`{3,}) ) [ ]* # spaces ( \\{?\\.? (?P [a-zA-Z0-9_+-./#]* ) # "py" or "javascript" \\}? ) # language, like ".py" or "{javascript}" [ ]* # spaces ( \\{?\\.? (?P
[^~`]* ) \\}? ) # header for features that use fenced block header syntax (like spoilers) $ """, re.VERBOSE) CODE_WRAP = '
{}\n
' LANG_TAG = ' class="{}"' def validate_curl_content(lines: List[str]) -> None: error_msg = """ Missing required -X argument in curl command: {command} """.strip() for line in lines: regex = r'curl [-](sS)?X "?(GET|DELETE|PATCH|POST)"?' if line.startswith('curl'): if re.search(regex, line) is None: raise MarkdownRenderingException(error_msg.format(command=line.strip())) CODE_VALIDATORS = { 'curl': validate_curl_content, } class FencedCodeExtension(markdown.Extension): def __init__(self, config: Mapping[str, Any] = {}) -> None: self.config = { 'run_content_validators': [ config.get('run_content_validators', False), 'Boolean specifying whether to run content validation code in CodeHandler', ], } for key, value in config.items(): self.setConfig(key, value) def extendMarkdown(self, md: markdown.Markdown) -> None: """ Add FencedBlockPreprocessor to the Markdown instance. """ md.registerExtension(self) processor = FencedBlockPreprocessor( md, run_content_validators=self.config['run_content_validators'][0]) md.preprocessors.register(processor, 'fenced_code_block', 25) class BaseHandler: def handle_line(self, line: str) -> None: raise NotImplementedError() def done(self) -> None: raise NotImplementedError() def generic_handler(processor: Any, output: MutableSequence[str], fence: str, lang: str, header: str, run_content_validators: bool=False, default_language: Optional[str]=None) -> BaseHandler: lang = lang.lower() if lang in ('quote', 'quoted'): return QuoteHandler(processor, output, fence, default_language) elif lang == 'math': return TexHandler(processor, output, fence) elif lang == 'spoiler': return SpoilerHandler(processor, output, fence, header) else: return CodeHandler(processor, output, fence, lang, run_content_validators) def check_for_new_fence(processor: Any, output: MutableSequence[str], line: str, run_content_validators: bool=False, default_language: Optional[str]=None) -> None: m = FENCE_RE.match(line) if m: fence = m.group('fence') lang = m.group('lang') header = m.group('header') if not lang and default_language: lang = default_language handler = generic_handler(processor, output, fence, lang, header, run_content_validators, default_language) processor.push(handler) else: output.append(line) class OuterHandler(BaseHandler): def __init__(self, processor: Any, output: MutableSequence[str], run_content_validators: bool=False, default_language: Optional[str]=None) -> None: self.output = output self.processor = processor self.run_content_validators = run_content_validators self.default_language = default_language def handle_line(self, line: str) -> None: check_for_new_fence(self.processor, self.output, line, self.run_content_validators, self.default_language) def done(self) -> None: self.processor.pop() class CodeHandler(BaseHandler): def __init__(self, processor: Any, output: MutableSequence[str], fence: str, lang: str, run_content_validators: bool=False) -> None: self.processor = processor self.output = output self.fence = fence self.lang = lang self.lines: List[str] = [] self.run_content_validators = run_content_validators def handle_line(self, line: str) -> None: if line.rstrip() == self.fence: self.done() else: self.lines.append(line.rstrip()) def done(self) -> None: text = '\n'.join(self.lines) # run content validators (if any) if self.run_content_validators: validator = CODE_VALIDATORS.get(self.lang, lambda text: None) validator(self.lines) text = self.processor.format_code(self.lang, text) text = self.processor.placeholder(text) processed_lines = text.split('\n') self.output.append('') self.output.extend(processed_lines) self.output.append('') self.processor.pop() class QuoteHandler(BaseHandler): def __init__(self, processor: Any, output: MutableSequence[str], fence: str, default_language: Optional[str]=None) -> None: self.processor = processor self.output = output self.fence = fence self.lines: List[str] = [] self.default_language = default_language def handle_line(self, line: str) -> None: if line.rstrip() == self.fence: self.done() else: check_for_new_fence(self.processor, self.lines, line, default_language=self.default_language) def done(self) -> None: text = '\n'.join(self.lines) text = self.processor.format_quote(text) processed_lines = text.split('\n') self.output.append('') self.output.extend(processed_lines) self.output.append('') self.processor.pop() class SpoilerHandler(BaseHandler): def __init__(self, processor: Any, output: MutableSequence[str], fence: str, spoiler_header: str) -> None: self.processor = processor self.output = output self.fence = fence self.spoiler_header = spoiler_header self.lines: List[str] = [] def handle_line(self, line: str) -> None: if line.rstrip() == self.fence: self.done() else: check_for_new_fence(self.processor, self.lines, line) def done(self) -> None: if len(self.lines) == 0: # No content, do nothing return else: header = self.spoiler_header text = '\n'.join(self.lines) text = self.processor.format_spoiler(header, text) processed_lines = text.split('\n') self.output.append('') self.output.extend(processed_lines) self.output.append('') self.processor.pop() class TexHandler(BaseHandler): def __init__(self, processor: Any, output: MutableSequence[str], fence: str) -> None: self.processor = processor self.output = output self.fence = fence self.lines: List[str] = [] def handle_line(self, line: str) -> None: if line.rstrip() == self.fence: self.done() else: self.lines.append(line) def done(self) -> None: text = '\n'.join(self.lines) text = self.processor.format_tex(text) text = self.processor.placeholder(text) processed_lines = text.split('\n') self.output.append('') self.output.extend(processed_lines) self.output.append('') self.processor.pop() class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): def __init__(self, md: markdown.Markdown, run_content_validators: bool=False) -> None: markdown.preprocessors.Preprocessor.__init__(self, md) self.checked_for_codehilite = False self.run_content_validators = run_content_validators self.codehilite_conf: Dict[str, List[Any]] = {} def push(self, handler: BaseHandler) -> None: self.handlers.append(handler) def pop(self) -> None: self.handlers.pop() def run(self, lines: Iterable[str]) -> List[str]: """ Match and store Fenced Code Blocks in the HtmlStash. """ output: List[str] = [] processor = self self.handlers: List[BaseHandler] = [] default_language = None try: default_language = self.md.zulip_realm.default_code_block_language except AttributeError: pass handler = OuterHandler(processor, output, self.run_content_validators, default_language) self.push(handler) for line in lines: self.handlers[-1].handle_line(line) while self.handlers: self.handlers[-1].done() # This fiddly handling of new lines at the end of our output was done to make # existing tests pass. Markdown is just kind of funny when it comes to new lines, # but we could probably remove this hack. if len(output) > 2 and output[-2] != '': output.append('') return output def format_code(self, lang: str, text: str) -> str: if lang: langclass = LANG_TAG.format(lang) else: langclass = '' # Check for code hilite extension if not self.checked_for_codehilite: for ext in self.md.registeredExtensions: if isinstance(ext, CodeHiliteExtension): self.codehilite_conf = ext.config break self.checked_for_codehilite = True # If config is not empty, then the codehighlite extension # is enabled, so we call it to highlite the code if self.codehilite_conf: highliter = CodeHilite(text, linenums=self.codehilite_conf['linenums'][0], guess_lang=self.codehilite_conf['guess_lang'][0], css_class=self.codehilite_conf['css_class'][0], style=self.codehilite_conf['pygments_style'][0], use_pygments=self.codehilite_conf['use_pygments'][0], lang=(lang or None), noclasses=self.codehilite_conf['noclasses'][0]) code = highliter.hilite().rstrip('\n') else: code = CODE_WRAP.format(langclass, self._escape(text)) # To support our "view in playground" feature, the frontend # needs to know what Pygments language was used for # highlighting this code block. We record this in a data # attribute attached to the outer `pre` element. # Unfortunately, the pygments API doesn't offer a way to add # this, so we need to do it in a post-processing step. if lang: parsed_code = etree.HTML(code) div_tag = parsed_code[0][0] # For the value of our data element, we get the lexer # subclass name instead of directly using the language, # since that canonicalizes aliases (Eg: `js` and # `javascript` will be mapped to `JavaScript`). try: code_language = get_lexer_by_name(lang).name except ClassNotFound: # If there isn't a Pygments lexer by this name, we # still tag it with the user's data-code-language # value, since this allows hooking up a "playground" # for custom "languages" that aren't known to Pygments. # # The escaping isn't strictly necessary, in that the # characters allowed for language values should all be # safe, but doing escaping here makes it easy to # reason about. code_language = self._escape(lang) div_tag.attrib['data-code-language'] = code_language # lxml implicitly converts tags like into . # Specifying method="c14n" when converting to string prevents that. code = etree.tostring(div_tag, method="c14n").decode() return code def format_quote(self, text: str) -> str: paragraphs = text.split("\n\n") quoted_paragraphs = [] for paragraph in paragraphs: lines = paragraph.split("\n") quoted_paragraphs.append("\n".join("> " + line for line in lines if line != '')) return "\n\n".join(quoted_paragraphs) def format_spoiler(self, header: str, text: str) -> str: output = [] header_div_open_html = '
' end_header_start_content_html = '
' output.append(self.placeholder(header_div_open_html)) output.append(header) output.append(self.placeholder(end_header_start_content_html)) output.append(text) output.append(self.placeholder(footer_html)) return "\n\n".join(output) def format_tex(self, text: str) -> str: paragraphs = text.split("\n\n") tex_paragraphs = [] for paragraph in paragraphs: html = render_tex(paragraph, is_inline=False) if html is not None: tex_paragraphs.append(html) else: tex_paragraphs.append('' + escape(paragraph) + '') return "\n\n".join(tex_paragraphs) def placeholder(self, code: str) -> str: return self.md.htmlStash.store(code) def _escape(self, txt: str) -> str: """ basic html escaping """ txt = txt.replace('&', '&') txt = txt.replace('<', '<') txt = txt.replace('>', '>') txt = txt.replace('"', '"') return txt def makeExtension(*args: Any, **kwargs: None) -> FencedCodeExtension: return FencedCodeExtension(kwargs) if __name__ == "__main__": import doctest doctest.testmod()