From 8c8dbb3d66a6f78de9a562e66fd6a344203043a4 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 28 Nov 2023 22:19:19 +0000 Subject: [PATCH] markdown: Stop attempting to expand/collapse re2 regex. 549dd8a4c4a4 changed the regex that we build to contain whitespace for readability, and strip that back out before returning it. Unfortunately, this also serves to strip out whitespace in the source linkifier, causing it to not match expected strings. Revert 549dd8a4c4a4. Fixes: #27854. --- zerver/lib/markdown/__init__.py | 20 +------------------- zerver/tests/test_markdown.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/zerver/lib/markdown/__init__.py b/zerver/lib/markdown/__init__.py index d352d79bfc..53bbb32b16 100644 --- a/zerver/lib/markdown/__init__.py +++ b/zerver/lib/markdown/__init__.py @@ -1821,25 +1821,7 @@ def prepare_linkifier_pattern(source: str) -> str: # We use an extended definition of 'whitespace' which is # equivalent to \p{White_Space} -- since \s in re2 only matches # ASCII spaces, and re2 does not support \p{White_Space}. - regex = rf""" - (?P<{BEFORE_CAPTURE_GROUP}> - ^ | - \s | {next_line} | \pZ | - ['"\(,:<] - ) - (?P<{OUTER_CAPTURE_GROUP}> - {source} - ) - (?P<{AFTER_CAPTURE_GROUP}> - $ | - [^\pL\pN] - ) - """ - # Strip out the spaces and newlines added to make the above - # legible -- re2 does not have the equivalent of the /x modifier - # that does this automatically. Note that we are careful to not - # strip _whitespace_, which would strip the literal \u0085 out. - return regex.replace(" ", "").replace("\n", "") + return rf"""(?P<{BEFORE_CAPTURE_GROUP}>^|\s|{next_line}|\pZ|['"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?P<{AFTER_CAPTURE_GROUP}>$|[^\pL\pN])""" # Given a regular expression pattern, linkifies groups that match it diff --git a/zerver/tests/test_markdown.py b/zerver/tests/test_markdown.py index 48b1a770a0..33c4eb93e1 100644 --- a/zerver/tests/test_markdown.py +++ b/zerver/tests/test_markdown.py @@ -1366,6 +1366,25 @@ class MarkdownTest(ZulipTestCase): [{"url": "https://example.com/A%20Test/%25%25%ba/123", "text": "url-123"}], ) + # Test spaces in the linkifier pattern + RealmFilter( + realm=realm, + pattern=r"community guidelines", + url_template="https://zulip.com/development-community/#community-norms", + ).save() + converted = markdown_convert("community guidelines", message_realm=realm, message=msg) + self.assertEqual( + converted.rendered_content, + '

community guidelines

', + ) + converted = markdown_convert( + "please observe community guidelines here", message_realm=realm, message=msg + ) + self.assertEqual( + converted.rendered_content, + '

please observe community guidelines here

', + ) + def test_multiple_matching_realm_patterns(self) -> None: realm = get_realm("zulip") self.check_add_linkifiers(