diff --git a/frontend_tests/node_tests/markdown.js b/frontend_tests/node_tests/markdown.js index ad937a55f2..19c4699afb 100644 --- a/frontend_tests/node_tests/markdown.js +++ b/frontend_tests/node_tests/markdown.js @@ -284,7 +284,7 @@ run_test('marked', () => { {input: 'Some text first\n* a\n* list \n* here\n\nand then after', expected: '

Some text first

\n\n

and then after

'}, {input: '1. an\n2. ordered \n3. list', - expected: '

1. an
\n2. ordered
\n3. list

'}, + expected: '
    \n
  1. an
  2. \n
  3. ordered
  4. \n
  5. list
  6. \n
'}, {input: '\n~~~quote\nquote this for me\n~~~\nthanks\n', expected: '
\n

quote this for me

\n
\n

thanks

'}, {input: 'This is a @**Cordelia Lear** mention', diff --git a/static/js/markdown.js b/static/js/markdown.js index 0b15e5e445..e2c5fd9050 100644 --- a/static/js/markdown.js +++ b/static/js/markdown.js @@ -341,82 +341,6 @@ exports.set_realm_filters = function (realm_filters) { marked.InlineLexer.rules.zulip.realm_filters = marked_rules; }; -var preprocess_auto_olists = (function () { - var TAB_LENGTH = 2; - var re = /^( *)(\d+)\. +(.*)/; - - function getIndent(match) { - return Math.floor(match[1].length / TAB_LENGTH); - } - - function extendArray(arr, arr2) { - Array.prototype.push.apply(arr, arr2); - } - - function renumber(mlist) { - if (!mlist.length) { - return []; - } - - var startNumber = parseInt(mlist[0][2], 10); - var changeNumbers = _.every(mlist, function (m) { - return startNumber === parseInt(m[2], 10); - }); - - var counter = startNumber; - return _.map(mlist, function (m) { - var number = changeNumbers ? counter.toString() : m[2]; - counter += 1; - return m[1] + number + '. ' + m[3]; - }); - } - - return function (src) { - var newLines = []; - var currentList = []; - var currentIndent = 0; - - _.each(src.split('\n'), function (line) { - var m = line.match(re); - var isNextItem = m && currentList.length && currentIndent === getIndent(m); - var isBlankLine = line.trim() === ""; - if (!isNextItem && !isBlankLine) { - // This is a non-blank line that doesn't start with a - // bullet, so we're done with the previous numbered - // list and can start a new one. - extendArray(newLines, renumber(currentList)); - currentList = []; - } - - if (!m) { - // This line doesn't start with a bullet. If it's not - // between bullets of a list (i.e. `currentList = - // []`), this is just normal content outside a bulleted - // list and we can append it to `new_lines`. - if (!currentList.length) { - newLines.push(line); - } - - // Otherwise, it's a blank line in between bullets, - // because if this was a bullet, `m` would be truthy, - // and if it wasn't blank, we could have terminated the - // list (see above). We can just skip this blank line - // syntax, as our bulleted list CSS styling will - // control vertical spacing between bullets. - } else if (isNextItem) { - currentList.push(m); - } else { - currentList = [m]; - currentIndent = getIndent(m); - } - }); - - extendArray(newLines, renumber(currentList)); - - return newLines.join('\n'); - }; -}()); - exports.initialize = function () { function disable_markdown_regex(rules, name) { @@ -466,11 +390,6 @@ exports.initialize = function () { return emoji.translate_emoticons_to_names(src); } - // Disable ordered lists - // We used GFM + tables, so replace the list start regex for that ruleset - // We remove the |[\d+]\. that matches the numbering in a numbered list - marked.Lexer.rules.tables.list = /^( *)((?:\*)) [\s\S]+?(?:\n+(?=(?: *[\-*_]){3,} *(?:\n+|$))|\n{2,}(?! )(?!\1(?:\*) )\n*|\s*$)/; - // Disable lheadings // We only keep the # Heading format. disable_markdown_regex(marked.Lexer.rules.tables, 'lheading'); @@ -517,7 +436,6 @@ exports.initialize = function () { renderer: r, preprocessors: [ preprocess_code_blocks, - preprocess_auto_olists, preprocess_translate_emoticons, ], }); diff --git a/static/styles/rendered_markdown.scss b/static/styles/rendered_markdown.scss index ba96d6d348..586ccdacd0 100644 --- a/static/styles/rendered_markdown.scss +++ b/static/styles/rendered_markdown.scss @@ -25,7 +25,7 @@ /* Ensure ordered lists are nicely centered in 1-line messages */ ol { - margin: 2px 0 5px 6px; + margin: 2px 0 5px 20px; } /* Swap the left and right margins of ordered list for Right-To-Left languages */ diff --git a/static/third/marked/lib/marked.js b/static/third/marked/lib/marked.js index 16d44a0f4a..c8777267a7 100644 --- a/static/third/marked/lib/marked.js +++ b/static/third/marked/lib/marked.js @@ -26,8 +26,7 @@ var block = { text: /^[^\n]+/ }; -// Zulip modification: Remove numbers from this pattern. -block.bullet = /(?:[*+-])/; +block.bullet = /(?:[*+-]|\d{1,9}\.)/; block.item = /^( *)(bull) [^\n]*(?:\n(?!\1bull )[^\n]*)*/; block.item = replace(block.item, 'gm') (/bull/g, block.bullet) @@ -167,9 +166,12 @@ Lexer.prototype.token = function(src, top, bq) { , bull , b , item + , listStart + , listItems , space , i - , l; + , l + , isordered; while (src) { // newline @@ -284,15 +286,21 @@ Lexer.prototype.token = function(src, top, bq) { if (cap = this.rules.list.exec(src)) { src = src.substring(cap[0].length); bull = cap[2]; + isordered = bull.length > 1; - this.tokens.push({ + listStart = { type: 'list_start', - ordered: bull.length > 1 - }); + ordered: isordered, + start: isordered ? +bull : '', + loose: false + }; + + this.tokens.push(listStart); // Get each top-level item. cap = cap[0].match(this.rules.item); + listItems = []; next = false; l = cap.length; i = 0; @@ -303,7 +311,7 @@ Lexer.prototype.token = function(src, top, bq) { // Remove the list item's bullet // so it is seen as the next token. space = item.length; - item = item.replace(/^ *([*+-]|\d+\.) +/, ''); + item = item.replace(/^ *([*+-]|\d+\.) */, ''); // Outdent whatever the // list item contains. Hacky. @@ -316,9 +324,10 @@ Lexer.prototype.token = function(src, top, bq) { // Determine whether the next list item belongs here. // Backpedal if it does not belong in this list. - if (this.options.smartLists && i !== l - 1) { + if (i !== l - 1) { b = block.bullet.exec(cap[i + 1])[0]; - if (bull !== b && !(bull.length > 1 && b.length > 1)) { + if (bull.length > 1 ? b.length === 1 + : (b.length > 1 || (this.options.smartLists && b !== bull))) { src = cap.slice(i + 1).join('\n') + src; i = l - 1; } @@ -333,20 +342,34 @@ Lexer.prototype.token = function(src, top, bq) { if (!loose) loose = next; } - this.tokens.push({ - type: loose - ? 'loose_item_start' - : 'list_item_start' - }); + if (loose) { + listStart.loose = true; + } + + t = { + type: 'list_item_start', + loose: loose + }; + + listItems.push(t); + this.tokens.push(t); // Recurse. - this.token(item, false, bq); + this.token(item, false); this.tokens.push({ type: 'list_item_end' }); } + if (listStart.loose) { + l = listItems.length; + i = 0; + for (; i < l; i++) { + listItems[i].loose = true; + } + } + this.tokens.push({ type: 'list_end' }); @@ -1031,9 +1054,10 @@ Renderer.prototype.hr = function() { return this.options.xhtml ? '
\n' : '
\n'; }; -Renderer.prototype.list = function(body, ordered) { - var type = ordered ? 'ol' : 'ul'; - return '<' + type + '>\n' + body + '\n'; +Renderer.prototype.list = function(body, ordered, start) { + var type = ordered ? 'ol' : 'ul', + startatt = (ordered && start !== 1) ? (' start="' + start + '"') : ''; + return '<' + type + startatt + '>\n' + body + '\n'; }; Renderer.prototype.listitem = function(text) { @@ -1282,34 +1306,27 @@ Parser.prototype.tok = function() { } case 'list_start': { var body = '' - , ordered = this.token.ordered; + , ordered = this.token.ordered + , start = this.token.start; while (this.next().type !== 'list_end') { body += this.tok(); } - return this.renderer.list(body, ordered); + return this.renderer.list(body, ordered, start); } case 'list_item_start': { - var body = ''; + var body = '' + , loose = this.token.loose; while (this.next().type !== 'list_item_end') { - body += this.token.type === 'text' + body += !loose && this.token.type === 'text' ? this.parseText() : this.tok(); } return this.renderer.listitem(body); } - case 'loose_item_start': { - var body = ''; - - while (this.next().type !== 'list_item_end') { - body += this.tok(); - } - - return this.renderer.listitem(body); - } case 'html': { var html = !this.token.pre && !this.options.pedantic ? this.inline.output(this.token.text) diff --git a/zerver/lib/bugdown/__init__.py b/zerver/lib/bugdown/__init__.py index d95a1b62e2..f54b4f30b2 100644 --- a/zerver/lib/bugdown/__init__.py +++ b/zerver/lib/bugdown/__init__.py @@ -27,7 +27,7 @@ import requests from django.conf import settings from django.db.models import Q -from markdown.extensions import codehilite, nl2br, tables +from markdown.extensions import codehilite, nl2br, tables, sane_lists from zerver.lib.bugdown import fenced_code from zerver.lib.bugdown.fenced_code import FENCE_RE from zerver.lib.camo import get_camo_url @@ -1421,24 +1421,24 @@ class AutoLink(CompiledPattern): db_data = self.markdown.zulip_db_data return url_to_a(db_data, url) -class UListProcessor(markdown.blockprocessors.UListProcessor): - """ Process unordered list blocks. - - Based on markdown.blockprocessors.UListProcessor, but does not accept - '+' or '-' as a bullet character.""" - - TAG = 'ul' - RE = re.compile('^[ ]{0,3}[*][ ]+(.*)') - +class OListProcessor(sane_lists.SaneOListProcessor): def __init__(self, parser: Any) -> None: - - # HACK: Set the tab length to 2 just for the initialization of - # this class, so that bulleted lists (and only bulleted lists) - # work off 2-space indentation. parser.markdown.tab_length = 2 super().__init__(parser) parser.markdown.tab_length = 4 +class UListProcessor(sane_lists.SaneUListProcessor): + """ Does not accept '+' or '-' as a bullet character. """ + + def __init__(self, parser: Any) -> None: + parser.markdown.tab_length = 2 + super().__init__(parser) + parser.markdown.tab_length = 4 + + self.RE = re.compile('^[ ]{0,%d}[*][ ]+(.*)' % (self.tab_length - 1,)) + self.CHILD_RE = re.compile(r'^[ ]{0,%d}(([*]))[ ]+(.*)' % + (self.tab_length - 1,)) + class ListIndentProcessor(markdown.blockprocessors.ListIndentProcessor): """ Process unordered list blocks. @@ -1482,16 +1482,15 @@ class BlockQuoteProcessor(markdown.blockprocessors.BlockQuoteProcessor): # And then run the upstream processor's code for removing the '>' return super().clean(line) -class BugdownUListPreprocessor(markdown.preprocessors.Preprocessor): - """ Allows unordered list blocks that come directly after a - paragraph to be rendered as an unordered list +class BugdownListPreprocessor(markdown.preprocessors.Preprocessor): + """ Allows list blocks that come directly after another block + to be rendered as a list. Detects paragraphs that have a matching list item that comes directly after a line of text, and inserts a newline between to satisfy Markdown""" - LI_RE = re.compile('^[ ]{0,3}[*][ ]+(.*)', re.MULTILINE) - HANGING_ULIST_RE = re.compile('^.+\\n([ ]{0,3}[*][ ]+.*)', re.MULTILINE) + LI_RE = re.compile(r'^[ ]{0,3}(\*|\d\.)[ ]+(.*)', re.MULTILINE) def run(self, lines: List[str]) -> List[str]: """ Insert a newline between a paragraph and ulist if missing """ @@ -1509,91 +1508,17 @@ class BugdownUListPreprocessor(markdown.preprocessors.Preprocessor): fence = None # If we're not in a fenced block and we detect an upcoming list - # hanging off a paragraph, add a newline - if (not fence and lines[i] and - self.LI_RE.match(lines[i+1]) and - not self.LI_RE.match(lines[i])): - - copy.insert(i+inserts+1, '') - inserts += 1 + # hanging off any block (including a list of another type), add + # a newline. + li1 = self.LI_RE.match(lines[i]) + li2 = self.LI_RE.match(lines[i+1]) + if not fence and lines[i]: + if (li2 and not li1) or (li1 and li2 and + (len(li1.group(1)) == 1) != (len(li2.group(1)) == 1)): + copy.insert(i+inserts+1, '') + inserts += 1 return copy -class AutoNumberOListPreprocessor(markdown.preprocessors.Preprocessor): - """ Finds a sequence of lines numbered by the same number""" - RE = re.compile(r'^([ ]*)(\d+)\.[ ]+(.*)') - TAB_LENGTH = 2 - - def run(self, lines: List[str]) -> List[str]: - new_lines = [] # type: List[str] - current_list = [] # type: List[Match[str]] - current_indent = 0 - - for line in lines: - m = self.RE.match(line) - - # Remember if this line is a continuation of already started list - is_next_item = (m and current_list - and current_indent == len(m.group(1)) // self.TAB_LENGTH) - - is_blank_line = line.strip() == "" - - if not is_next_item and not is_blank_line: - # This is a non-blank line that doesn't start with a - # bullet, so we're done with the previous numbered - # list and can start a new one. - new_lines.extend(self.renumber(current_list)) - current_list = [] - - if not m: - # This line doesn't start with a bullet. If it's not - # between bullets of a list (i.e. `current_list = - # []`), this is just normal content outside a bulleted - # list and we can append it to `new_lines`. - if not current_list: - # Ordinary line - new_lines.append(line) - - # Otherwise, it's a blank line in between bullets, - # because if this was a bullet, `m` would be truthy, - # and if it wasn't blank, we could have terminated the - # list (see above). We can just skip this blank line - # syntax, as our bulleted list CSS styling will - # control vertical spacing between bullets. - elif is_next_item: - # Another list item - current_list.append(m) - else: - # First list item - current_list = [m] - current_indent = len(m.group(1)) // self.TAB_LENGTH - - new_lines.extend(self.renumber(current_list)) - - return new_lines - - def renumber(self, mlist: List[Match[str]]) -> List[str]: - if not mlist: - return [] - - start_number = int(mlist[0].group(2)) - - # Change numbers only if every one is the same - change_numbers = True - for m in mlist: - if int(m.group(2)) != start_number: - change_numbers = False - break - - lines = [] # type: List[str] - counter = start_number - - for m in mlist: - number = str(counter) if change_numbers else m.group(2) - lines.append('%s%s. %s' % (m.group(1), number, m.group(3))) - counter += 1 - - return lines - # Name for the outer capture group we use to separate whitespace and # other delimiters from the actual content. This value won't be an # option in user-entered capture groups. @@ -1874,8 +1799,7 @@ class Bugdown(markdown.Markdown): # html_block - insecure # reference - references don't make sense in a chat context. preprocessors = markdown.util.Registry() - preprocessors.register(AutoNumberOListPreprocessor(self), 'auto_number_olist', 40) - preprocessors.register(BugdownUListPreprocessor(self), 'hanging_ulists', 35) + preprocessors.register(BugdownListPreprocessor(self), 'hanging_lists', 35) preprocessors.register(markdown.preprocessors.NormalizeWhitespace(self), 'normalize_whitespace', 30) preprocessors.register(fenced_code.FencedBlockPreprocessor(self), 'fenced_code_block', 25) preprocessors.register(AlertWordsNotificationProcessor(self), 'custom_text_notifications', 20) @@ -1897,6 +1821,7 @@ class Bugdown(markdown.Markdown): parser.blockprocessors.register(HashHeaderProcessor(parser), 'hashheader', 78) # We get priority 75 from 'table' extension parser.blockprocessors.register(markdown.blockprocessors.HRProcessor(parser), 'hr', 70) + parser.blockprocessors.register(OListProcessor(parser), 'olist', 68) parser.blockprocessors.register(UListProcessor(parser), 'ulist', 65) parser.blockprocessors.register(ListIndentProcessor(parser), 'indent', 60) parser.blockprocessors.register(BlockQuoteProcessor(parser), 'quote', 55) diff --git a/zerver/lib/push_notifications.py b/zerver/lib/push_notifications.py index 2fd53c25d7..1f151164f7 100644 --- a/zerver/lib/push_notifications.py +++ b/zerver/lib/push_notifications.py @@ -489,15 +489,31 @@ def get_mobile_push_content(rendered_content: str) -> str: quote_text += '\n' return quote_text + def render_olist(ol: lxml.html.HtmlElement) -> str: + items = [] + counter = int(ol.get('start')) if ol.get('start') else 1 + nested_levels = len(list(ol.iterancestors('ol'))) + indent = ('\n' + ' ' * nested_levels) if nested_levels else '' + + for li in ol: + items.append(indent + str(counter) + '. ' + process(li).strip()) + counter += 1 + + return '\n'.join(items) + def process(elem: lxml.html.HtmlElement) -> str: - plain_text = get_text(elem) - sub_text = '' - for child in elem: - sub_text += process(child) - if elem.tag == 'blockquote': - sub_text = format_as_quote(sub_text) - plain_text += sub_text - plain_text += elem.tail or "" + plain_text = '' + if elem.tag == 'ol': + plain_text = render_olist(elem) + else: + plain_text = get_text(elem) + sub_text = '' + for child in elem: + sub_text += process(child) + if elem.tag == 'blockquote': + sub_text = format_as_quote(sub_text) + plain_text += sub_text + plain_text += elem.tail or "" return plain_text if settings.PUSH_NOTIFICATION_REDACT_CONTENT: diff --git a/zerver/tests/fixtures/markdown_test_cases.json b/zerver/tests/fixtures/markdown_test_cases.json index cc53eda7f2..9e3164aa13 100644 --- a/zerver/tests/fixtures/markdown_test_cases.json +++ b/zerver/tests/fixtures/markdown_test_cases.json @@ -292,27 +292,27 @@ }, { "name": "numbered_list", - "input": "1. A\n 2. B", - "expected_output": "

1. A
\n 2. B

", - "text_content": "1. A\n 2. B" + "input": "1. A\n2. B\n 3. C", + "expected_output": "
    \n
  1. A
  2. \n
  3. B
      \n
    1. C
    2. \n
    \n
  4. \n
", + "text_content": "1. A\n2. B\n 3. C" }, { - "name": "auto_renumbered_list", - "input": "1. A\n1. B\n 1. C\n1. D", - "expected_output": "

1. A
\n2. B
\n 3. C
\n4. D

", - "text_content": "1. A\n2. B\n 3. C\n4. D" + "name": "auto_renumbered_list_trick", + "input": "1. A\n1. B\n 1. C\n1. D", + "expected_output": "
    \n
  1. A
  2. \n
  3. B
      \n
    1. C
    2. \n
    \n
  4. \n
  5. D
  6. \n
", + "text_content": "1. A\n2. B\n 1. C\n3. D" }, { "name": "auto_renumbered_list_from", "input": "3. A\n3. B\n3. C\n3. D", - "expected_output": "

3. A
\n4. B
\n5. C
\n6. D

", + "expected_output": "
    \n
  1. A
  2. \n
  3. B
  4. \n
  5. C
  6. \n
  7. D
  8. \n
", "text_content": "3. A\n4. B\n5. C\n6. D" }, { - "name": "not_auto_renumbered_list", - "input": "1. A\n3. B\n 2. C\n1. D", - "expected_output": "

1. A
\n3. B
\n 2. C
\n1. D

", - "text_content": "1. A\n3. B\n 2. C\n1. D" + "name": "two_auto_renumbered_lists", + "input": "1. A\n3. B\n 2. C\n1. D", + "expected_output": "
    \n
  1. A
  2. \n
  3. B
      \n
    1. C
    2. \n
    \n
  4. \n
  5. D
  6. \n
", + "text_content": "1. A\n2. B\n 2. C\n3. D" }, { "name": "linkify_interference", @@ -775,8 +775,9 @@ { "name": "auto_renumbered_list_blankline", "input": "1. A\n\n1. B\n\n1. C\n\n1. D\nordinary paragraph\n1. AA\n\n1. BB", - "expected_output": "

1. A
\n2. B
\n3. C
\n4. D
\nordinary paragraph
\n1. AA
\n2. BB

", - "text_content": "1. A\n2. B\n3. C\n4. D\nordinary paragraph\n1. AA\n2. BB" + "expected_output": "
    \n
  1. \n

    A

    \n
  2. \n
  3. \n

    B

    \n
  4. \n
  5. \n

    C

    \n
  6. \n
  7. \n

    D
    \nordinary paragraph

    \n
  8. \n
  9. \n

    AA

    \n
  10. \n
  11. \n

    BB

    \n
  12. \n
", + "marked_expected_output": "
    \n
  1. A

    \n
  2. \n
  3. B

    \n
  4. \n
  5. C

    \n
  6. \n
  7. D
    \nordinary paragraph

    \n
  8. \n
  9. AA

    \n
  10. \n
  11. BB

    \n
  12. \n
", + "text_content": "1. A\n2. B\n3. C\n4. D\nordinary paragraph\n5. AA\n6. BB" } ], "linkify_tests": [ diff --git a/zerver/tests/test_bugdown.py b/zerver/tests/test_bugdown.py index 4fad31bd3b..7acfc0ba3d 100644 --- a/zerver/tests/test_bugdown.py +++ b/zerver/tests/test_bugdown.py @@ -463,7 +463,7 @@ class BugdownTest(ZulipTestCase): converted = render_markdown(msg, content) self.assertEqual(converted, expected) - content = '>- http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg\n\nAwesome!' + content = '>* http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg\n\nAwesome!' expected = '
\n\n
\n

Awesome!

' sender_user_profile = self.example_user('othello') msg = Message(sender=sender_user_profile, sending_client=get_client("test"))