From b7c5ae7bca3990421971e93fbe0ce85f1699f347 Mon Sep 17 00:00:00 2001
From: Rohitt Vashishtha <aero31aero@gmail.com>
Date: Thu, 20 Dec 2018 07:28:40 +0000
Subject: [PATCH] dependencies: Upgrade markdown from 2.6.11 -> 3.0.1.

This is a major upgrade, and requires some significant compatibility
work:
* Migrating the pattern-removal logic to use the Registry feature.
* Handling the removal of positional arguments in markdown extensions.
* Handling the removal of safe mode.
---
 requirements/common.in                   |  2 +-
 requirements/dev.txt                     |  2 +-
 requirements/prod.txt                    |  2 +-
 version.py                               |  2 +-
 zerver/lib/bugdown/__init__.py           | 59 ++++++++++++++++++------
 zerver/lib/bugdown/api_code_examples.py  |  2 +-
 zerver/lib/bugdown/fenced_code.py        |  2 +-
 zerver/lib/bugdown/nested_code_blocks.py |  2 +-
 zerver/lib/bugdown/tabbed_sections.py    |  2 +-
 9 files changed, 52 insertions(+), 23 deletions(-)
diff --git a/requirements/common.in b/requirements/common.in
index 54ef4a3498..83b360e66d 100644
--- a/requirements/common.in
+++ b/requirements/common.in
@@ -12,7 +12,7 @@ mypy_extensions==0.4.1
 Jinja2==2.10
 
 # Needed for markdown processing
-Markdown==2.6.11
+Markdown==3.0.1
 MarkupSafe==1.1.0
 Pygments==2.3.1
 
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 5a365ca127..952e9f8d35 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -92,7 +92,7 @@ jsondiff==1.1.1           # via moto
 jsonpickle==1.0           # via aws-xray-sdk, python-digitalocean
 lxml==4.3.0
 markdown-include==0.5.1
-markdown==2.6.11
+markdown==3.0.1
 markupsafe==1.1.0
 matrix-client==0.3.2
 mock==2.0.0
diff --git a/requirements/prod.txt b/requirements/prod.txt
index 81e139ee48..dc05232000 100644
--- a/requirements/prod.txt
+++ b/requirements/prod.txt
@@ -66,7 +66,7 @@ jedi==0.13.2              # via ipython
 jinja2==2.10
 lxml==4.3.0
 markdown-include==0.5.1
-markdown==2.6.11
+markdown==3.0.1
 markupsafe==1.1.0
 matrix-client==0.3.2
 mock==2.0.0
diff --git a/version.py b/version.py
index 878b899207..89617b9115 100644
--- a/version.py
+++ b/version.py
@@ -11,4 +11,4 @@ LATEST_RELEASE_ANNOUNCEMENT = "https://blog.zulip.org/2018/11/07/zulip-1-9-relea
 # Typically, adding a dependency only requires a minor version bump, and
 # removing a dependency requires a major version bump.
 
-PROVISION_VERSION = '26.22'
+PROVISION_VERSION = '27.0'
diff --git a/zerver/lib/bugdown/__init__.py b/zerver/lib/bugdown/__init__.py
index 1a9df32f31..5c90f8cc0a 100644
--- a/zerver/lib/bugdown/__init__.py
+++ b/zerver/lib/bugdown/__init__.py
@@ -1436,6 +1436,25 @@ class LinkPattern(markdown.inlinepatterns.Pattern):
         fixup_link(el, target_blank=(href[:1] != '#'))
         return el
 
+
+# We need the following since upgrade from py-markdown 2.6.11 to 3.0.1
+# modifies the link handling significantly. The following is taken from
+# py-markdown 2.6.11 markdown/inlinepatterns.py.
+def get_link_re() -> str:
+    NOBRACKET = r'[^\]\[]*'
+    BRK = (
+        r'\[(' +
+        (NOBRACKET + r'(\[')*6 +
+        (NOBRACKET + r'\])*')*6 +
+        NOBRACKET + r')\]'
+    )
+    NOIMG = r'(?<!\!)'
+
+    # [text](url) or [text](<url>) or [text](url "title")
+    LINK_RE = NOIMG + BRK + \
+        r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)'''
+    return LINK_RE
+
 def prepare_realm_pattern(source: str) -> str:
     """ Augment a realm filter so it only matches after start-of-string,
     whitespace, or opening delimiters, won't match if there are word
@@ -1595,6 +1614,15 @@ class AtomicLinkPattern(LinkPattern):
             ret.text = markdown.util.AtomicString(ret.text)
         return ret
 
+def get_sub_registry(r: markdown.util.Registry, keys: List[str]) -> markdown.util.Registry:
+    # Registry is a new class added by py-markdown to replace Ordered List.
+    # Since Registry doesn't support .keys(), it is easier to make a new
+    # object instead of removing keys from the existing object.
+    new_r = markdown.util.Registry()
+    for k in keys:
+        new_r.register(r[k], k, r.get_index_for_name(k))
+    return new_r
+
 # These are used as keys ("realm_filters_keys") to md_engines and the respective
 # realm filter caches
 DEFAULT_BUGDOWN_KEY = -1
@@ -1622,8 +1650,10 @@ class Bugdown(markdown.Extension):
         for k in ('image_link', 'image_reference', 'automail',
                   'autolink', 'link', 'reference', 'short_reference',
                   'escape', 'strong_em', 'emphasis', 'emphasis2',
-                  'linebreak', 'strong', 'backtick'):
-            del md.inlinePatterns[k]
+                  'linebreak', 'strong', 'backtick', 'em_strong',
+                  'strong2'):
+            md.inlinePatterns.deregister(k)
+
         try:
             # linebreak2 was removed upstream in version 3.2.1, so
             # don't throw an error if it is not there
@@ -1719,7 +1749,7 @@ class Bugdown(markdown.Extension):
         md.inlinePatterns.add('unicodeemoji', UnicodeEmoji(unicode_emoji_regex), '_end')
 
     def extend_misc(self, md: markdown.Markdown) -> None:
-        md.inlinePatterns.add('link', AtomicLinkPattern(markdown.inlinepatterns.LINK_RE, md), '>avatar')
+        md.inlinePatterns.add('link', AtomicLinkPattern(get_link_re(), md), '>avatar')
 
         for (pattern, format_string, id) in self.getConfig("realm_filters"):
             md.inlinePatterns.add('realm_filters/%s' % (pattern,),
@@ -1745,18 +1775,17 @@ class Bugdown(markdown.Extension):
             # users' traffic that is mirrored.  Note that
             # inline_interesting_links is a treeprocessor and thus is
             # not removed
-            for k in list(md.inlinePatterns.keys()):
-                if k not in ["autolink"]:
-                    del md.inlinePatterns[k]
-            for k in list(md.treeprocessors.keys()):
-                if k not in ["inline_interesting_links", "inline", "rewrite_to_https"]:
-                    del md.treeprocessors[k]
-            for k in list(md.preprocessors.keys()):
-                if k not in ["custom_text_notifications"]:
-                    del md.preprocessors[k]
-            for k in list(md.parser.blockprocessors.keys()):
-                if k not in ["paragraph"]:
-                    del md.parser.blockprocessors[k]
+            md.inlinePatterns = get_sub_registry(md.inlinePatterns, ['autolink'])
+            md.treeprocessors = get_sub_registry(md.treeprocessors,
+                                                 ['inline_interesting_links',
+                                                  'rewrite_to_https'])
+            # insert new 'inline' processor because we have changed md.inlinePatterns
+            # but InlineProcessor copies md as self.md in __init__.
+            md.treeprocessors.add('inline',
+                                  markdown.treeprocessors.InlineProcessor(md),
+                                  '>inline_interesting_links')
+            md.preprocessors = get_sub_registry(md.preprocessors, ['custom_text_notifications'])
+            md.parser.blockprocessors = get_sub_registry(md.parser.blockprocessors, ['paragraph'])
 
 md_engines = {}  # type: Dict[Tuple[int, bool], markdown.Markdown]
 realm_filter_data = {}  # type: Dict[int, List[Tuple[str, str, int]]]
diff --git a/zerver/lib/bugdown/api_code_examples.py b/zerver/lib/bugdown/api_code_examples.py
index 3603ab5e7c..abb712da2b 100644
--- a/zerver/lib/bugdown/api_code_examples.py
+++ b/zerver/lib/bugdown/api_code_examples.py
@@ -158,4 +158,4 @@ class APICodeExamplesPreprocessor(Preprocessor):
         return fixture
 
 def makeExtension(*args: Any, **kwargs: str) -> APICodeExamplesGenerator:
-    return APICodeExamplesGenerator(kwargs)
+    return APICodeExamplesGenerator(**kwargs)
diff --git a/zerver/lib/bugdown/fenced_code.py b/zerver/lib/bugdown/fenced_code.py
index 834db151fe..6a3bd51175 100644
--- a/zerver/lib/bugdown/fenced_code.py
+++ b/zerver/lib/bugdown/fenced_code.py
@@ -322,7 +322,7 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
         return "\n\n".join(tex_paragraphs)
 
     def placeholder(self, code: str) -> str:
-        return self.markdown.htmlStash.store(code, safe=True)
+        return self.markdown.htmlStash.store(code)
 
     def _escape(self, txt: str) -> str:
         """ basic html escaping """
diff --git a/zerver/lib/bugdown/nested_code_blocks.py b/zerver/lib/bugdown/nested_code_blocks.py
index 32213e1709..d7856ed05a 100644
--- a/zerver/lib/bugdown/nested_code_blocks.py
+++ b/zerver/lib/bugdown/nested_code_blocks.py
@@ -72,4 +72,4 @@ class NestedCodeBlocksRendererTreeProcessor(markdown.treeprocessors.Treeprocesso
                 parent.remove(element_to_replace)
 
 def makeExtension(*args: Any, **kwargs: str) -> NestedCodeBlocksRenderer:
-    return NestedCodeBlocksRenderer(kwargs)
+    return NestedCodeBlocksRenderer(**kwargs)
diff --git a/zerver/lib/bugdown/tabbed_sections.py b/zerver/lib/bugdown/tabbed_sections.py
index 4e264b9f23..04887d693b 100644
--- a/zerver/lib/bugdown/tabbed_sections.py
+++ b/zerver/lib/bugdown/tabbed_sections.py
@@ -127,4 +127,4 @@ class TabbedSectionsPreprocessor(Preprocessor):
         return block
 
 def makeExtension(*args: Any, **kwargs: str) -> TabbedSectionsGenerator:
-    return TabbedSectionsGenerator(kwargs)
+    return TabbedSectionsGenerator(**kwargs)