markdown: Render ordered lists using <ol> markup.

This brings us in line, and also allows us to style these more like
unordered lists, which is visually more appealing.

On the backend, we now use the default list blockprocessor + sane list
extension of python-markdown to get proper list markup; on the
frontend, we mostly return to upstream's code as they have followed
CommonMark on this issue.

Using <ol> here necessarily removes the behaviour of not renumbering
on lists written like 3, 4, 7; hopefully users will be OK with the
change.

Fixes #12822.
This commit is contained in:
Thomas Ip 2019-08-11 13:41:34 +08:00 committed by Tim Abbott
parent 4a176714e1
commit 574c35c0b8
8 changed files with 119 additions and 242 deletions

View File

@ -284,7 +284,7 @@ run_test('marked', () => {
{input: 'Some text first\n* a\n* list \n* here\n\nand then after',
expected: '<p>Some text first</p>\n<ul>\n<li>a</li>\n<li>list </li>\n<li>here</li>\n</ul>\n<p>and then after</p>'},
{input: '1. an\n2. ordered \n3. list',
expected: '<p>1. an<br>\n2. ordered<br>\n3. list</p>'},
expected: '<ol>\n<li>an</li>\n<li>ordered </li>\n<li>list</li>\n</ol>'},
{input: '\n~~~quote\nquote this for me\n~~~\nthanks\n',
expected: '<blockquote>\n<p>quote this for me</p>\n</blockquote>\n<p>thanks</p>'},
{input: 'This is a @**Cordelia Lear** mention',

View File

@ -341,82 +341,6 @@ exports.set_realm_filters = function (realm_filters) {
marked.InlineLexer.rules.zulip.realm_filters = marked_rules;
};
var preprocess_auto_olists = (function () {
var TAB_LENGTH = 2;
var re = /^( *)(\d+)\. +(.*)/;
function getIndent(match) {
return Math.floor(match[1].length / TAB_LENGTH);
}
function extendArray(arr, arr2) {
Array.prototype.push.apply(arr, arr2);
}
function renumber(mlist) {
if (!mlist.length) {
return [];
}
var startNumber = parseInt(mlist[0][2], 10);
var changeNumbers = _.every(mlist, function (m) {
return startNumber === parseInt(m[2], 10);
});
var counter = startNumber;
return _.map(mlist, function (m) {
var number = changeNumbers ? counter.toString() : m[2];
counter += 1;
return m[1] + number + '. ' + m[3];
});
}
return function (src) {
var newLines = [];
var currentList = [];
var currentIndent = 0;
_.each(src.split('\n'), function (line) {
var m = line.match(re);
var isNextItem = m && currentList.length && currentIndent === getIndent(m);
var isBlankLine = line.trim() === "";
if (!isNextItem && !isBlankLine) {
// This is a non-blank line that doesn't start with a
// bullet, so we're done with the previous numbered
// list and can start a new one.
extendArray(newLines, renumber(currentList));
currentList = [];
}
if (!m) {
// This line doesn't start with a bullet. If it's not
// between bullets of a list (i.e. `currentList =
// []`), this is just normal content outside a bulleted
// list and we can append it to `new_lines`.
if (!currentList.length) {
newLines.push(line);
}
// Otherwise, it's a blank line in between bullets,
// because if this was a bullet, `m` would be truthy,
// and if it wasn't blank, we could have terminated the
// list (see above). We can just skip this blank line
// syntax, as our bulleted list CSS styling will
// control vertical spacing between bullets.
} else if (isNextItem) {
currentList.push(m);
} else {
currentList = [m];
currentIndent = getIndent(m);
}
});
extendArray(newLines, renumber(currentList));
return newLines.join('\n');
};
}());
exports.initialize = function () {
function disable_markdown_regex(rules, name) {
@ -466,11 +390,6 @@ exports.initialize = function () {
return emoji.translate_emoticons_to_names(src);
}
// Disable ordered lists
// We used GFM + tables, so replace the list start regex for that ruleset
// We remove the |[\d+]\. that matches the numbering in a numbered list
marked.Lexer.rules.tables.list = /^( *)((?:\*)) [\s\S]+?(?:\n+(?=(?: *[\-*_]){3,} *(?:\n+|$))|\n{2,}(?! )(?!\1(?:\*) )\n*|\s*$)/;
// Disable lheadings
// We only keep the # Heading format.
disable_markdown_regex(marked.Lexer.rules.tables, 'lheading');
@ -517,7 +436,6 @@ exports.initialize = function () {
renderer: r,
preprocessors: [
preprocess_code_blocks,
preprocess_auto_olists,
preprocess_translate_emoticons,
],
});

View File

@ -25,7 +25,7 @@
/* Ensure ordered lists are nicely centered in 1-line messages */
ol {
margin: 2px 0 5px 6px;
margin: 2px 0 5px 20px;
}
/* Swap the left and right margins of ordered list for Right-To-Left languages */

View File

@ -26,8 +26,7 @@ var block = {
text: /^[^\n]+/
};
// Zulip modification: Remove numbers from this pattern.
block.bullet = /(?:[*+-])/;
block.bullet = /(?:[*+-]|\d{1,9}\.)/;
block.item = /^( *)(bull) [^\n]*(?:\n(?!\1bull )[^\n]*)*/;
block.item = replace(block.item, 'gm')
(/bull/g, block.bullet)
@ -167,9 +166,12 @@ Lexer.prototype.token = function(src, top, bq) {
, bull
, b
, item
, listStart
, listItems
, space
, i
, l;
, l
, isordered;
while (src) {
// newline
@ -284,15 +286,21 @@ Lexer.prototype.token = function(src, top, bq) {
if (cap = this.rules.list.exec(src)) {
src = src.substring(cap[0].length);
bull = cap[2];
isordered = bull.length > 1;
this.tokens.push({
listStart = {
type: 'list_start',
ordered: bull.length > 1
});
ordered: isordered,
start: isordered ? +bull : '',
loose: false
};
this.tokens.push(listStart);
// Get each top-level item.
cap = cap[0].match(this.rules.item);
listItems = [];
next = false;
l = cap.length;
i = 0;
@ -303,7 +311,7 @@ Lexer.prototype.token = function(src, top, bq) {
// Remove the list item's bullet
// so it is seen as the next token.
space = item.length;
item = item.replace(/^ *([*+-]|\d+\.) +/, '');
item = item.replace(/^ *([*+-]|\d+\.) */, '');
// Outdent whatever the
// list item contains. Hacky.
@ -316,9 +324,10 @@ Lexer.prototype.token = function(src, top, bq) {
// Determine whether the next list item belongs here.
// Backpedal if it does not belong in this list.
if (this.options.smartLists && i !== l - 1) {
if (i !== l - 1) {
b = block.bullet.exec(cap[i + 1])[0];
if (bull !== b && !(bull.length > 1 && b.length > 1)) {
if (bull.length > 1 ? b.length === 1
: (b.length > 1 || (this.options.smartLists && b !== bull))) {
src = cap.slice(i + 1).join('\n') + src;
i = l - 1;
}
@ -333,20 +342,34 @@ Lexer.prototype.token = function(src, top, bq) {
if (!loose) loose = next;
}
this.tokens.push({
type: loose
? 'loose_item_start'
: 'list_item_start'
});
if (loose) {
listStart.loose = true;
}
t = {
type: 'list_item_start',
loose: loose
};
listItems.push(t);
this.tokens.push(t);
// Recurse.
this.token(item, false, bq);
this.token(item, false);
this.tokens.push({
type: 'list_item_end'
});
}
if (listStart.loose) {
l = listItems.length;
i = 0;
for (; i < l; i++) {
listItems[i].loose = true;
}
}
this.tokens.push({
type: 'list_end'
});
@ -1031,9 +1054,10 @@ Renderer.prototype.hr = function() {
return this.options.xhtml ? '<hr/>\n' : '<hr>\n';
};
Renderer.prototype.list = function(body, ordered) {
var type = ordered ? 'ol' : 'ul';
return '<' + type + '>\n' + body + '</' + type + '>\n';
Renderer.prototype.list = function(body, ordered, start) {
var type = ordered ? 'ol' : 'ul',
startatt = (ordered && start !== 1) ? (' start="' + start + '"') : '';
return '<' + type + startatt + '>\n' + body + '</' + type + '>\n';
};
Renderer.prototype.listitem = function(text) {
@ -1282,34 +1306,27 @@ Parser.prototype.tok = function() {
}
case 'list_start': {
var body = ''
, ordered = this.token.ordered;
, ordered = this.token.ordered
, start = this.token.start;
while (this.next().type !== 'list_end') {
body += this.tok();
}
return this.renderer.list(body, ordered);
return this.renderer.list(body, ordered, start);
}
case 'list_item_start': {
var body = '';
var body = ''
, loose = this.token.loose;
while (this.next().type !== 'list_item_end') {
body += this.token.type === 'text'
body += !loose && this.token.type === 'text'
? this.parseText()
: this.tok();
}
return this.renderer.listitem(body);
}
case 'loose_item_start': {
var body = '';
while (this.next().type !== 'list_item_end') {
body += this.tok();
}
return this.renderer.listitem(body);
}
case 'html': {
var html = !this.token.pre && !this.options.pedantic
? this.inline.output(this.token.text)

View File

@ -27,7 +27,7 @@ import requests
from django.conf import settings
from django.db.models import Q
from markdown.extensions import codehilite, nl2br, tables
from markdown.extensions import codehilite, nl2br, tables, sane_lists
from zerver.lib.bugdown import fenced_code
from zerver.lib.bugdown.fenced_code import FENCE_RE
from zerver.lib.camo import get_camo_url
@ -1421,24 +1421,24 @@ class AutoLink(CompiledPattern):
db_data = self.markdown.zulip_db_data
return url_to_a(db_data, url)
class UListProcessor(markdown.blockprocessors.UListProcessor):
""" Process unordered list blocks.
Based on markdown.blockprocessors.UListProcessor, but does not accept
'+' or '-' as a bullet character."""
TAG = 'ul'
RE = re.compile('^[ ]{0,3}[*][ ]+(.*)')
class OListProcessor(sane_lists.SaneOListProcessor):
def __init__(self, parser: Any) -> None:
# HACK: Set the tab length to 2 just for the initialization of
# this class, so that bulleted lists (and only bulleted lists)
# work off 2-space indentation.
parser.markdown.tab_length = 2
super().__init__(parser)
parser.markdown.tab_length = 4
class UListProcessor(sane_lists.SaneUListProcessor):
""" Does not accept '+' or '-' as a bullet character. """
def __init__(self, parser: Any) -> None:
parser.markdown.tab_length = 2
super().__init__(parser)
parser.markdown.tab_length = 4
self.RE = re.compile('^[ ]{0,%d}[*][ ]+(.*)' % (self.tab_length - 1,))
self.CHILD_RE = re.compile(r'^[ ]{0,%d}(([*]))[ ]+(.*)' %
(self.tab_length - 1,))
class ListIndentProcessor(markdown.blockprocessors.ListIndentProcessor):
""" Process unordered list blocks.
@ -1482,16 +1482,15 @@ class BlockQuoteProcessor(markdown.blockprocessors.BlockQuoteProcessor):
# And then run the upstream processor's code for removing the '>'
return super().clean(line)
class BugdownUListPreprocessor(markdown.preprocessors.Preprocessor):
""" Allows unordered list blocks that come directly after a
paragraph to be rendered as an unordered list
class BugdownListPreprocessor(markdown.preprocessors.Preprocessor):
""" Allows list blocks that come directly after another block
to be rendered as a list.
Detects paragraphs that have a matching list item that comes
directly after a line of text, and inserts a newline between
to satisfy Markdown"""
LI_RE = re.compile('^[ ]{0,3}[*][ ]+(.*)', re.MULTILINE)
HANGING_ULIST_RE = re.compile('^.+\\n([ ]{0,3}[*][ ]+.*)', re.MULTILINE)
LI_RE = re.compile(r'^[ ]{0,3}(\*|\d\.)[ ]+(.*)', re.MULTILINE)
def run(self, lines: List[str]) -> List[str]:
""" Insert a newline between a paragraph and ulist if missing """
@ -1509,91 +1508,17 @@ class BugdownUListPreprocessor(markdown.preprocessors.Preprocessor):
fence = None
# If we're not in a fenced block and we detect an upcoming list
# hanging off a paragraph, add a newline
if (not fence and lines[i] and
self.LI_RE.match(lines[i+1]) and
not self.LI_RE.match(lines[i])):
# hanging off any block (including a list of another type), add
# a newline.
li1 = self.LI_RE.match(lines[i])
li2 = self.LI_RE.match(lines[i+1])
if not fence and lines[i]:
if (li2 and not li1) or (li1 and li2 and
(len(li1.group(1)) == 1) != (len(li2.group(1)) == 1)):
copy.insert(i+inserts+1, '')
inserts += 1
return copy
class AutoNumberOListPreprocessor(markdown.preprocessors.Preprocessor):
""" Finds a sequence of lines numbered by the same number"""
RE = re.compile(r'^([ ]*)(\d+)\.[ ]+(.*)')
TAB_LENGTH = 2
def run(self, lines: List[str]) -> List[str]:
new_lines = [] # type: List[str]
current_list = [] # type: List[Match[str]]
current_indent = 0
for line in lines:
m = self.RE.match(line)
# Remember if this line is a continuation of already started list
is_next_item = (m and current_list
and current_indent == len(m.group(1)) // self.TAB_LENGTH)
is_blank_line = line.strip() == ""
if not is_next_item and not is_blank_line:
# This is a non-blank line that doesn't start with a
# bullet, so we're done with the previous numbered
# list and can start a new one.
new_lines.extend(self.renumber(current_list))
current_list = []
if not m:
# This line doesn't start with a bullet. If it's not
# between bullets of a list (i.e. `current_list =
# []`), this is just normal content outside a bulleted
# list and we can append it to `new_lines`.
if not current_list:
# Ordinary line
new_lines.append(line)
# Otherwise, it's a blank line in between bullets,
# because if this was a bullet, `m` would be truthy,
# and if it wasn't blank, we could have terminated the
# list (see above). We can just skip this blank line
# syntax, as our bulleted list CSS styling will
# control vertical spacing between bullets.
elif is_next_item:
# Another list item
current_list.append(m)
else:
# First list item
current_list = [m]
current_indent = len(m.group(1)) // self.TAB_LENGTH
new_lines.extend(self.renumber(current_list))
return new_lines
def renumber(self, mlist: List[Match[str]]) -> List[str]:
if not mlist:
return []
start_number = int(mlist[0].group(2))
# Change numbers only if every one is the same
change_numbers = True
for m in mlist:
if int(m.group(2)) != start_number:
change_numbers = False
break
lines = [] # type: List[str]
counter = start_number
for m in mlist:
number = str(counter) if change_numbers else m.group(2)
lines.append('%s%s. %s' % (m.group(1), number, m.group(3)))
counter += 1
return lines
# Name for the outer capture group we use to separate whitespace and
# other delimiters from the actual content. This value won't be an
# option in user-entered capture groups.
@ -1874,8 +1799,7 @@ class Bugdown(markdown.Markdown):
# html_block - insecure
# reference - references don't make sense in a chat context.
preprocessors = markdown.util.Registry()
preprocessors.register(AutoNumberOListPreprocessor(self), 'auto_number_olist', 40)
preprocessors.register(BugdownUListPreprocessor(self), 'hanging_ulists', 35)
preprocessors.register(BugdownListPreprocessor(self), 'hanging_lists', 35)
preprocessors.register(markdown.preprocessors.NormalizeWhitespace(self), 'normalize_whitespace', 30)
preprocessors.register(fenced_code.FencedBlockPreprocessor(self), 'fenced_code_block', 25)
preprocessors.register(AlertWordsNotificationProcessor(self), 'custom_text_notifications', 20)
@ -1897,6 +1821,7 @@ class Bugdown(markdown.Markdown):
parser.blockprocessors.register(HashHeaderProcessor(parser), 'hashheader', 78)
# We get priority 75 from 'table' extension
parser.blockprocessors.register(markdown.blockprocessors.HRProcessor(parser), 'hr', 70)
parser.blockprocessors.register(OListProcessor(parser), 'olist', 68)
parser.blockprocessors.register(UListProcessor(parser), 'ulist', 65)
parser.blockprocessors.register(ListIndentProcessor(parser), 'indent', 60)
parser.blockprocessors.register(BlockQuoteProcessor(parser), 'quote', 55)

View File

@ -489,7 +489,23 @@ def get_mobile_push_content(rendered_content: str) -> str:
quote_text += '\n'
return quote_text
def render_olist(ol: lxml.html.HtmlElement) -> str:
items = []
counter = int(ol.get('start')) if ol.get('start') else 1
nested_levels = len(list(ol.iterancestors('ol')))
indent = ('\n' + ' ' * nested_levels) if nested_levels else ''
for li in ol:
items.append(indent + str(counter) + '. ' + process(li).strip())
counter += 1
return '\n'.join(items)
def process(elem: lxml.html.HtmlElement) -> str:
plain_text = ''
if elem.tag == 'ol':
plain_text = render_olist(elem)
else:
plain_text = get_text(elem)
sub_text = ''
for child in elem:

View File

@ -292,27 +292,27 @@
},
{
"name": "numbered_list",
"input": "1. A\n 2. B",
"expected_output": "<p>1. A<br>\n 2. B</p>",
"text_content": "1. A\n 2. B"
"input": "1. A\n2. B\n 3. C",
"expected_output": "<ol>\n<li>A</li>\n<li>B<ol start=\"3\">\n<li>C</li>\n</ol>\n</li>\n</ol>",
"text_content": "1. A\n2. B\n 3. C"
},
{
"name": "auto_renumbered_list",
"name": "auto_renumbered_list_trick",
"input": "1. A\n1. B\n 1. C\n1. D",
"expected_output": "<p>1. A<br>\n2. B<br>\n 3. C<br>\n4. D</p>",
"text_content": "1. A\n2. B\n 3. C\n4. D"
"expected_output": "<ol>\n<li>A</li>\n<li>B<ol>\n<li>C</li>\n</ol>\n</li>\n<li>D</li>\n</ol>",
"text_content": "1. A\n2. B\n 1. C\n3. D"
},
{
"name": "auto_renumbered_list_from",
"input": "3. A\n3. B\n3. C\n3. D",
"expected_output": "<p>3. A<br>\n4. B<br>\n5. C<br>\n6. D</p>",
"expected_output": "<ol start=\"3\">\n<li>A</li>\n<li>B</li>\n<li>C</li>\n<li>D</li>\n</ol>",
"text_content": "3. A\n4. B\n5. C\n6. D"
},
{
"name": "not_auto_renumbered_list",
"name": "two_auto_renumbered_lists",
"input": "1. A\n3. B\n 2. C\n1. D",
"expected_output": "<p>1. A<br>\n3. B<br>\n 2. C<br>\n1. D</p>",
"text_content": "1. A\n3. B\n 2. C\n1. D"
"expected_output": "<ol>\n<li>A</li>\n<li>B<ol start=\"2\">\n<li>C</li>\n</ol>\n</li>\n<li>D</li>\n</ol>",
"text_content": "1. A\n2. B\n 2. C\n3. D"
},
{
"name": "linkify_interference",
@ -775,8 +775,9 @@
{
"name": "auto_renumbered_list_blankline",
"input": "1. A\n\n1. B\n\n1. C\n\n1. D\nordinary paragraph\n1. AA\n\n1. BB",
"expected_output": "<p>1. A<br>\n2. B<br>\n3. C<br>\n4. D<br>\nordinary paragraph<br>\n1. AA<br>\n2. BB</p>",
"text_content": "1. A\n2. B\n3. C\n4. D\nordinary paragraph\n1. AA\n2. BB"
"expected_output": "<ol>\n<li>\n<p>A</p>\n</li>\n<li>\n<p>B</p>\n</li>\n<li>\n<p>C</p>\n</li>\n<li>\n<p>D<br>\nordinary paragraph</p>\n</li>\n<li>\n<p>AA</p>\n</li>\n<li>\n<p>BB</p>\n</li>\n</ol>",
"marked_expected_output": "<ol>\n<li><p>A</p>\n</li>\n<li><p>B</p>\n</li>\n<li><p>C</p>\n</li>\n<li><p>D<br>\nordinary paragraph</p>\n</li>\n<li><p>AA</p>\n</li>\n<li><p>BB</p>\n</li>\n</ol>",
"text_content": "1. A\n2. B\n3. C\n4. D\nordinary paragraph\n5. AA\n6. BB"
}
],
"linkify_tests": [

View File

@ -463,7 +463,7 @@ class BugdownTest(ZulipTestCase):
converted = render_markdown(msg, content)
self.assertEqual(converted, expected)
content = '>- http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg\n\nAwesome!'
content = '>* http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg\n\nAwesome!'
expected = '<blockquote>\n<ul>\n<li><a href="http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg" target="_blank" title="http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg">http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg</a></li>\n</ul>\n</blockquote>\n<p>Awesome!</p>'
sender_user_profile = self.example_user('othello')
msg = Message(sender=sender_user_profile, sending_client=get_client("test"))