From b7c5ae7bca3990421971e93fbe0ce85f1699f347 Mon Sep 17 00:00:00 2001 From: Rohitt Vashishtha Date: Thu, 20 Dec 2018 07:28:40 +0000 Subject: [PATCH] dependencies: Upgrade markdown from 2.6.11 -> 3.0.1. This is a major upgrade, and requires some significant compatibility work: * Migrating the pattern-removal logic to use the Registry feature. * Handling the removal of positional arguments in markdown extensions. * Handling the removal of safe mode. --- requirements/common.in | 2 +- requirements/dev.txt | 2 +- requirements/prod.txt | 2 +- version.py | 2 +- zerver/lib/bugdown/__init__.py | 59 ++++++++++++++++++------ zerver/lib/bugdown/api_code_examples.py | 2 +- zerver/lib/bugdown/fenced_code.py | 2 +- zerver/lib/bugdown/nested_code_blocks.py | 2 +- zerver/lib/bugdown/tabbed_sections.py | 2 +- 9 files changed, 52 insertions(+), 23 deletions(-) diff --git a/requirements/common.in b/requirements/common.in index 54ef4a3498..83b360e66d 100644 --- a/requirements/common.in +++ b/requirements/common.in @@ -12,7 +12,7 @@ mypy_extensions==0.4.1 Jinja2==2.10 # Needed for markdown processing -Markdown==2.6.11 +Markdown==3.0.1 MarkupSafe==1.1.0 Pygments==2.3.1 diff --git a/requirements/dev.txt b/requirements/dev.txt index 5a365ca127..952e9f8d35 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -92,7 +92,7 @@ jsondiff==1.1.1 # via moto jsonpickle==1.0 # via aws-xray-sdk, python-digitalocean lxml==4.3.0 markdown-include==0.5.1 -markdown==2.6.11 +markdown==3.0.1 markupsafe==1.1.0 matrix-client==0.3.2 mock==2.0.0 diff --git a/requirements/prod.txt b/requirements/prod.txt index 81e139ee48..dc05232000 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -66,7 +66,7 @@ jedi==0.13.2 # via ipython jinja2==2.10 lxml==4.3.0 markdown-include==0.5.1 -markdown==2.6.11 +markdown==3.0.1 markupsafe==1.1.0 matrix-client==0.3.2 mock==2.0.0 diff --git a/version.py b/version.py index 878b899207..89617b9115 100644 --- a/version.py +++ b/version.py @@ -11,4 +11,4 @@ LATEST_RELEASE_ANNOUNCEMENT = "https://blog.zulip.org/2018/11/07/zulip-1-9-relea # Typically, adding a dependency only requires a minor version bump, and # removing a dependency requires a major version bump. -PROVISION_VERSION = '26.22' +PROVISION_VERSION = '27.0' diff --git a/zerver/lib/bugdown/__init__.py b/zerver/lib/bugdown/__init__.py index 1a9df32f31..5c90f8cc0a 100644 --- a/zerver/lib/bugdown/__init__.py +++ b/zerver/lib/bugdown/__init__.py @@ -1436,6 +1436,25 @@ class LinkPattern(markdown.inlinepatterns.Pattern): fixup_link(el, target_blank=(href[:1] != '#')) return el + +# We need the following since upgrade from py-markdown 2.6.11 to 3.0.1 +# modifies the link handling significantly. The following is taken from +# py-markdown 2.6.11 markdown/inlinepatterns.py. +def get_link_re() -> str: + NOBRACKET = r'[^\]\[]*' + BRK = ( + r'\[(' + + (NOBRACKET + r'(\[')*6 + + (NOBRACKET + r'\])*')*6 + + NOBRACKET + r')\]' + ) + NOIMG = r'(?) or [text](url "title") + LINK_RE = NOIMG + BRK + \ + r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)''' + return LINK_RE + def prepare_realm_pattern(source: str) -> str: """ Augment a realm filter so it only matches after start-of-string, whitespace, or opening delimiters, won't match if there are word @@ -1595,6 +1614,15 @@ class AtomicLinkPattern(LinkPattern): ret.text = markdown.util.AtomicString(ret.text) return ret +def get_sub_registry(r: markdown.util.Registry, keys: List[str]) -> markdown.util.Registry: + # Registry is a new class added by py-markdown to replace Ordered List. + # Since Registry doesn't support .keys(), it is easier to make a new + # object instead of removing keys from the existing object. + new_r = markdown.util.Registry() + for k in keys: + new_r.register(r[k], k, r.get_index_for_name(k)) + return new_r + # These are used as keys ("realm_filters_keys") to md_engines and the respective # realm filter caches DEFAULT_BUGDOWN_KEY = -1 @@ -1622,8 +1650,10 @@ class Bugdown(markdown.Extension): for k in ('image_link', 'image_reference', 'automail', 'autolink', 'link', 'reference', 'short_reference', 'escape', 'strong_em', 'emphasis', 'emphasis2', - 'linebreak', 'strong', 'backtick'): - del md.inlinePatterns[k] + 'linebreak', 'strong', 'backtick', 'em_strong', + 'strong2'): + md.inlinePatterns.deregister(k) + try: # linebreak2 was removed upstream in version 3.2.1, so # don't throw an error if it is not there @@ -1719,7 +1749,7 @@ class Bugdown(markdown.Extension): md.inlinePatterns.add('unicodeemoji', UnicodeEmoji(unicode_emoji_regex), '_end') def extend_misc(self, md: markdown.Markdown) -> None: - md.inlinePatterns.add('link', AtomicLinkPattern(markdown.inlinepatterns.LINK_RE, md), '>avatar') + md.inlinePatterns.add('link', AtomicLinkPattern(get_link_re(), md), '>avatar') for (pattern, format_string, id) in self.getConfig("realm_filters"): md.inlinePatterns.add('realm_filters/%s' % (pattern,), @@ -1745,18 +1775,17 @@ class Bugdown(markdown.Extension): # users' traffic that is mirrored. Note that # inline_interesting_links is a treeprocessor and thus is # not removed - for k in list(md.inlinePatterns.keys()): - if k not in ["autolink"]: - del md.inlinePatterns[k] - for k in list(md.treeprocessors.keys()): - if k not in ["inline_interesting_links", "inline", "rewrite_to_https"]: - del md.treeprocessors[k] - for k in list(md.preprocessors.keys()): - if k not in ["custom_text_notifications"]: - del md.preprocessors[k] - for k in list(md.parser.blockprocessors.keys()): - if k not in ["paragraph"]: - del md.parser.blockprocessors[k] + md.inlinePatterns = get_sub_registry(md.inlinePatterns, ['autolink']) + md.treeprocessors = get_sub_registry(md.treeprocessors, + ['inline_interesting_links', + 'rewrite_to_https']) + # insert new 'inline' processor because we have changed md.inlinePatterns + # but InlineProcessor copies md as self.md in __init__. + md.treeprocessors.add('inline', + markdown.treeprocessors.InlineProcessor(md), + '>inline_interesting_links') + md.preprocessors = get_sub_registry(md.preprocessors, ['custom_text_notifications']) + md.parser.blockprocessors = get_sub_registry(md.parser.blockprocessors, ['paragraph']) md_engines = {} # type: Dict[Tuple[int, bool], markdown.Markdown] realm_filter_data = {} # type: Dict[int, List[Tuple[str, str, int]]] diff --git a/zerver/lib/bugdown/api_code_examples.py b/zerver/lib/bugdown/api_code_examples.py index 3603ab5e7c..abb712da2b 100644 --- a/zerver/lib/bugdown/api_code_examples.py +++ b/zerver/lib/bugdown/api_code_examples.py @@ -158,4 +158,4 @@ class APICodeExamplesPreprocessor(Preprocessor): return fixture def makeExtension(*args: Any, **kwargs: str) -> APICodeExamplesGenerator: - return APICodeExamplesGenerator(kwargs) + return APICodeExamplesGenerator(**kwargs) diff --git a/zerver/lib/bugdown/fenced_code.py b/zerver/lib/bugdown/fenced_code.py index 834db151fe..6a3bd51175 100644 --- a/zerver/lib/bugdown/fenced_code.py +++ b/zerver/lib/bugdown/fenced_code.py @@ -322,7 +322,7 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): return "\n\n".join(tex_paragraphs) def placeholder(self, code: str) -> str: - return self.markdown.htmlStash.store(code, safe=True) + return self.markdown.htmlStash.store(code) def _escape(self, txt: str) -> str: """ basic html escaping """ diff --git a/zerver/lib/bugdown/nested_code_blocks.py b/zerver/lib/bugdown/nested_code_blocks.py index 32213e1709..d7856ed05a 100644 --- a/zerver/lib/bugdown/nested_code_blocks.py +++ b/zerver/lib/bugdown/nested_code_blocks.py @@ -72,4 +72,4 @@ class NestedCodeBlocksRendererTreeProcessor(markdown.treeprocessors.Treeprocesso parent.remove(element_to_replace) def makeExtension(*args: Any, **kwargs: str) -> NestedCodeBlocksRenderer: - return NestedCodeBlocksRenderer(kwargs) + return NestedCodeBlocksRenderer(**kwargs) diff --git a/zerver/lib/bugdown/tabbed_sections.py b/zerver/lib/bugdown/tabbed_sections.py index 4e264b9f23..04887d693b 100644 --- a/zerver/lib/bugdown/tabbed_sections.py +++ b/zerver/lib/bugdown/tabbed_sections.py @@ -127,4 +127,4 @@ class TabbedSectionsPreprocessor(Preprocessor): return block def makeExtension(*args: Any, **kwargs: str) -> TabbedSectionsGenerator: - return TabbedSectionsGenerator(kwargs) + return TabbedSectionsGenerator(**kwargs)