From ae58ed5a74d9fb902bc5f746086cf0400009abcf Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Mon, 14 Sep 2020 21:43:56 -0700 Subject: [PATCH] markdown: Tweak data-code-language testing and comments. This should make it clearer the precise decisions we've made about the intended semantics of this feature. --- zerver/lib/markdown/fenced_code.py | 33 ++++++++++++------- .../tests/fixtures/markdown_test_cases.json | 7 ++++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/zerver/lib/markdown/fenced_code.py b/zerver/lib/markdown/fenced_code.py index 6e3f3cfb8c..56093b5f95 100644 --- a/zerver/lib/markdown/fenced_code.py +++ b/zerver/lib/markdown/fenced_code.py @@ -395,23 +395,32 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): else: code = CODE_WRAP.format(langclass, self._escape(text)) - # In order to display a "view-in-playground" option in the frontend, - # we need to know the language used in the codeblock. We tweak the HTML - # CodeHilite generates to add this language as a data-attribute. + # To support our "view in playground" feature, the frontend + # needs to know what Pygments language was used for + # highlighting this code block. We record this in a data + # attribute attached to the outer `pre` element. + # Unfortunately, the pygments API doesn't offer a way to add + # this, so we need to do it in a post-processing step. if lang: parsed_code = etree.HTML(code) div_tag = parsed_code[0][0] - # We get the lexer subclass name instead of directly processing the lang, to avoid - # different tags being generated for each of the lang's alias. Eg: `js` and `javascript` - # would now be mapped to `JavaScript`. In case no lexer with that alias is found, we - # return back the text, wrapped in a data-codehilite tag. + + # For the value of our data element, we get the lexer + # subclass name instead of directly using the language, + # since that canonicalizes aliases (Eg: `js` and + # `javascript` will be mapped to `JavaScript`). try: - lexer_subclass_name = get_lexer_by_name(lang).name + code_language = get_lexer_by_name(lang).name except ClassNotFound: - lexer_subclass_name = lang - div_tag.attrib['data-code-language'] = lexer_subclass_name - # Lxml implicitly converts tags like into - # specifying method="c14n" when converting to string, prevents that. + # If there isn't a Pygments lexer by this name, we + # still tag it with the user's data-code-language + # value, since this allows hooking up a "playground" + # for custom "languages" that aren't known to Pygments. + code_language = lang + + div_tag.attrib['data-code-language'] = code_language + # lxml implicitly converts tags like into . + # Specifying method="c14n" when converting to string prevents that. code = etree.tostring(div_tag, method="c14n").decode() return code diff --git a/zerver/tests/fixtures/markdown_test_cases.json b/zerver/tests/fixtures/markdown_test_cases.json index 455b8b0514..726605e5f6 100644 --- a/zerver/tests/fixtures/markdown_test_cases.json +++ b/zerver/tests/fixtures/markdown_test_cases.json @@ -7,6 +7,13 @@ "marked_expected_output": "

Hamlet said:

\n
def speak(self):\n    x = 1\n
", "text_content": "Hamlet said:\ndef speak(self):\n x = 1\n" }, + { + "name": "codeblock_hilite", + "input": "``` inventedlanguage\ndef speak(self):\n x = 1\n```", + "expected_output": "
def speak(self):\n    x = 1\n
", + "marked_expected_output": "
def speak(self):\n    x = 1\n
", + "text_content": "def speak(self):\n x = 1\n" + }, { "name": "ampampamp", "input": "& & &\n~~~~\n& & &\n~~~~\n & & &",