markdown: Allow whitespace overlaps in topic linkifiers.

`prepare_linkifier_pattern`, as of db934be064, adds a match to the end of the regex, of either the end of string, or a non-word character -- this is in place of a negative look-ahead, which is no longer possible in re2. This causes the regex to consume trailing whitespace, and thus not be able to match twice in succession with `pattern.finditer` -- "#1234 #5678" fails to match because the space is consumed by the first match of the regex. Rather than use `pattern.finditer`, write own own version, which rewinds over the non-word character consumed after the match, if any. This allows the same "after" non-word character to also satisfy the "before" of the next match. Fixes #21502.
2022-03-21 20:11:23 -04:00 · 2022-03-21 20:11:23 -04:00 · 1ac0035f8c
parent d89b5042a9
commit 1ac0035f8c
2 changed files with 27 additions and 4 deletions
--- a/zerver/lib/markdown/init.py
+++ b/zerver/lib/markdown/init.py
@ -2365,9 +2365,21 @@ def topic_links(linkifiers_key: int, topic_name: str) -> List[Dict[str, str]]:
            # here on an invalid regex would spam the logs with every
            # message sent; simply move on.
            continue
-        for m in pattern.finditer(topic_name):
+        pos = 0
+        while pos < len(topic_name):
+            m = pattern.search(topic_name, pos)
+            if m is None:
+                break
+
            match_details = m.groupdict()
            match_text = match_details[OUTER_CAPTURE_GROUP]
+
+            # Adjust the start point of the match for the next
+            # iteration -- we rewind the non-word character at the
+            # end, if there was one, so a potential next match can
+            # also use it.
+            pos = m.end() - len(match_details[AFTER_CAPTURE_GROUP])
+
            # We format the linkifier's url string using the matched text.
            # Also, we include the matched text in the response, so that our clients
            # don't have to implement any logic of their own to get back the text.
--- a/zerver/tests/test_markdown.py
+++ b/zerver/tests/test_markdown.py
@ -1322,13 +1322,13 @@ class MarkdownTest(ZulipTestCase):

        flush_per_request_caches()

-        content = "We should fix #224 and #115, but not issue#124 or #1124z or [trac #15](https://trac.example.com/ticket/16) today."
+        content = "We should fix #224 #336 #446 and #115, but not issue#124 or #1124z or [trac #15](https://trac.example.com/ticket/16) today."
        converted = markdown_convert(content, message_realm=realm, message=msg)
        converted_topic = topic_links(realm.id, msg.topic_name())

        self.assertEqual(
            converted.rendered_content,
-            '<p>We should fix <a href="https://trac.example.com/ticket/224">#224</a> and <a href="https://trac.example.com/ticket/115">#115</a>, but not issue#124 or #1124z or <a href="https://trac.example.com/ticket/16">trac #15</a> today.</p>',
+            '<p>We should fix <a href="https://trac.example.com/ticket/224">#224</a> <a href="https://trac.example.com/ticket/336">#336</a> <a href="https://trac.example.com/ticket/446">#446</a> and <a href="https://trac.example.com/ticket/115">#115</a>, but not issue#124 or #1124z or <a href="https://trac.example.com/ticket/16">trac #15</a> today.</p>',
        )
        self.assertEqual(
            converted_topic, [{"url": "https://trac.example.com/ticket/444", "text": "#444"}]
@ -1344,6 +1344,17 @@ class MarkdownTest(ZulipTestCase):
            ],
        )

+        msg.set_topic_name("#444 #555 #666")
+        converted_topic = topic_links(realm.id, msg.topic_name())
+        self.assertEqual(
+            converted_topic,
+            [
+                {"url": "https://trac.example.com/ticket/444", "text": "#444"},
+                {"url": "https://trac.example.com/ticket/555", "text": "#555"},
+                {"url": "https://trac.example.com/ticket/666", "text": "#666"},
+            ],
+        )
+
        RealmFilter(
            realm=realm,
            pattern=r"#(?P<id>[a-zA-Z]+-[0-9]+)",
@ -1474,7 +1485,7 @@ class MarkdownTest(ZulipTestCase):
            converted.rendered_content,
            '<p>We should fix <a href="https://trac.example.com/ticket/ABC-123">ABC-123</a> or <a href="https://trac.example.com/ticket/16">trac ABC-123</a> today.</p>',
        )
-        # Both the links should be generated in topics.
+        # But both the links should be generated in topics.
        self.assertEqual(
            converted_topic,
            [