markdown: Add data-codehilite-language attr for fenced code.

When converting fenced code markdown, we add the language (if specified)
in a data-attribute by tweaking the HTML generated. Doing so, allows the
frontend to make use of this attr to display view-in-playground option
for codeblocks.

We use pygments to get the lexer subclass name and use that instead of
directly using the language in the data-attribute. Doing so, helps us
map different language aliases (like `js` and `javascript`) into a common
variable (like `JavaScript`) - and avoids the client from dealing with
multiple tags corresponding to the same language.

The html structure for a message like this:

``` js
..content..
```

would now be:

<div class="codehilite" data-codehilite-language="JavaScript">
    <pre>..content..</pre>
</div>

Tests and fixtures amended.
This commit is contained in:
Sumanth V Rao 2020-09-06 12:11:37 +05:30 committed by Tim Abbott
parent e9d0bdea65
commit 033351609d
3 changed files with 27 additions and 4 deletions

View File

@ -80,7 +80,10 @@ from typing import Any, Dict, Iterable, List, Mapping, MutableSequence, Optional
import markdown
from django.utils.html import escape
from lxml import etree
from markdown.extensions.codehilite import CodeHilite, CodeHiliteExtension
from pygments.lexers import get_lexer_by_name
from pygments.util import ClassNotFound
from zerver.lib.exceptions import MarkdownRenderingException
from zerver.lib.tex import render_tex
@ -392,6 +395,24 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
else:
code = CODE_WRAP.format(langclass, self._escape(text))
# In order to display a "view-in-playground" option in the frontend,
# we need to know the language used in the codeblock. We tweak the HTML
# CodeHilite generates to add this language as a data-attribute.
if lang:
parsed_code = etree.HTML(code)
div_tag = parsed_code[0][0]
# We get the lexer subclass name instead of directly processing the lang, to avoid
# different tags being generated for each of the lang's alias. Eg: `js` and `javascript`
# would now be mapped to `JavaScript`. In case no lexer with that alias is found, we
# return back the text, wrapped in a data-codehilite tag.
try:
lexer_subclass_name = get_lexer_by_name(lang).name
except ClassNotFound:
lexer_subclass_name = lang
div_tag.attrib['data-codehilite-language'] = lexer_subclass_name
# Lxml implicitly converts tags like <span></span> into <span/>
# specifying method="c14n" when converting to string, prevents that.
code = etree.tostring(div_tag, method="c14n").decode()
return code
def format_quote(self, text: str) -> str:

View File

@ -3,7 +3,7 @@
{
"name": "codeblock_hilite",
"input": "Hamlet said:\n~~~~.python \ndef speak(self):\n x = 1\n~~~~",
"expected_output": "<p>Hamlet said:</p>\n<div class=\"codehilite\"><pre><span></span><code><span class=\"k\">def</span> <span class=\"nf\">speak</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n <span class=\"n\">x</span> <span class=\"o\">=</span> <span class=\"mi\">1</span>\n</code></pre></div>",
"expected_output": "<p>Hamlet said:</p>\n<div class=\"codehilite\" data-codehilite-language=\"Python\"><pre><span></span><code><span class=\"k\">def</span> <span class=\"nf\">speak</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n <span class=\"n\">x</span> <span class=\"o\">=</span> <span class=\"mi\">1</span>\n</code></pre></div>",
"marked_expected_output": "<p>Hamlet said:</p>\n<div class=\"codehilite\"><pre><span></span><code>def speak(self):\n x = 1\n</code></pre></div>",
"text_content": "Hamlet said:\ndef speak(self):\n x = 1\n"
},
@ -786,13 +786,13 @@
{
"name": "tex_fenced_tex",
"input": "```tex\n\n\\pi \\textbf{ is not } 3.14\n```",
"expected_output": "<div class=\"codehilite\"><pre><span></span><code><span class=\"k\">\\pi</span> <span class=\"k\">\\textbf</span><span class=\"nb\">{</span> is not <span class=\"nb\">}</span> 3.14\n</code></pre></div>",
"expected_output": "<div class=\"codehilite\" data-codehilite-language=\"TeX\"><pre><span></span><code><span class=\"k\">\\pi</span> <span class=\"k\">\\textbf</span><span class=\"nb\">{</span> is not <span class=\"nb\">}</span> 3.14\n</code></pre></div>",
"marked_expected_output": "<div class=\"codehilite\"><pre><span></span><code>\\pi \\textbf{ is not } 3.14\n</code></pre></div>"
},
{
"name": "tex_fenced_latex",
"input": "```latex\n\n\\pi \\textbf{ is not } 3.14\n```",
"expected_output": "<div class=\"codehilite\"><pre><span></span><code><span class=\"k\">\\pi</span> <span class=\"k\">\\textbf</span><span class=\"nb\">{</span> is not <span class=\"nb\">}</span> 3.14\n</code></pre></div>",
"expected_output": "<div class=\"codehilite\" data-codehilite-language=\"TeX\"><pre><span></span><code><span class=\"k\">\\pi</span> <span class=\"k\">\\textbf</span><span class=\"nb\">{</span> is not <span class=\"nb\">}</span> 3.14\n</code></pre></div>",
"marked_expected_output": "<div class=\"codehilite\"><pre><span></span><code>\\pi \\textbf{ is not } 3.14\n</code></pre></div>"
},
{

View File

@ -1376,6 +1376,7 @@ class MarkdownTest(ZulipTestCase):
msg_without_language = markdown_convert_wrapper(text.format(''))
msg_with_quote = markdown_convert_wrapper(text.format('quote'))
msg_with_math = markdown_convert_wrapper(text.format('math'))
msg_with_none = markdown_convert_wrapper(text.format('none'))
# Render with default=javascript
do_set_realm_property(realm, 'default_code_block_language', 'javascript')
@ -1403,7 +1404,8 @@ class MarkdownTest(ZulipTestCase):
self.assertTrue(msg_with_python == msg_with_python_default_js == msg_without_language_default_py)
self.assertTrue(msg_with_quote == msg_without_language_default_quote)
self.assertTrue(msg_with_math == msg_without_language_default_math)
self.assertTrue(msg_without_language == msg_with_none_default_py == msg_without_language_final)
self.assertTrue(msg_without_language == msg_without_language_final)
self.assertTrue(msg_with_none == msg_with_none_default_py)
# Test checking inside nested quotes
nested_text = "````quote\n\n{}\n\n{}````".format(text.format('js'), text.format(''))